diff --git a/README.md b/README.md index 2d2cfcf..770e335 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ The server listens on `http://localhost:8080` by default. - `PORT`: HTTP port, default `8080` - `HOST`: bind address, default `0.0.0.0` - `DATA_DIR`: disk storage root, default `./data` +- `SITE_URL`: public canonical site URL used for SEO metadata, sitemaps, feeds, and `llms.txt` - `SEED_DEMO_MEMES`: set to `false` to disable generated demo memes on first boot - `ADMIN_TOKEN`: secret review URL token. If omitted, one is generated at boot and printed in server logs. - `OPENAI_API_KEY`: enables AI upload moderation. Without it, uploads are queued for admin review. @@ -33,6 +34,19 @@ data/ meta/YYYY/MM/DD/aa/bb/.json ``` +## Discovery Metadata + +The app serves crawler and answer-engine metadata without adding visible page copy: + +- `/robots.txt` +- `/sitemap.xml` with approved meme image entries +- `/feed.json` +- `/llms.txt` +- `/site.webmanifest` +- Open Graph, Twitter card, canonical, and JSON-LD metadata on `/` + +Set `SITE_URL` in production so canonical URLs use the public domain instead of an internal proxy hostname. + ## Docker ```sh diff --git a/public/index.html b/public/index.html index e11a06c..1236a2a 100644 --- a/public/index.html +++ b/public/index.html @@ -3,11 +3,28 @@ - The Meme Protocol + __SEO_TITLE__ + + + + + + + + + + + + + + + + + diff --git a/server.js b/server.js index ef8c849..fae0492 100644 --- a/server.js +++ b/server.js @@ -1,14 +1,16 @@ import http from 'node:http'; import crypto from 'node:crypto'; +import fs from 'node:fs/promises'; import { URL } from 'node:url'; import { parseMultipartUpload } from './src/multipart.js'; -import { sendFile, sendJson, sendText, withSecurityHeaders } from './src/http.js'; +import { sendFile, sendHtml, sendJson, sendText, withSecurityHeaders } from './src/http.js'; import { createStore } from './src/store.js'; import { validateImage } from './src/image.js'; import { normalizeToWebp } from './src/normalize.js'; import { seedDemoMemes } from './src/seed.js'; import { moderateImage } from './src/moderation.js'; import { createUploadLimiter } from './src/uploadLimits.js'; +import { feedJson, llmsTxt, manifest, noIndex, publicBaseUrl, renderIndex, robotsTxt, sitemapXml } from './src/seo.js'; const PORT = Number.parseInt(process.env.PORT || '8080', 10); const HOST = process.env.HOST || '0.0.0.0'; @@ -18,6 +20,7 @@ const UPLOAD_MAX_BYTES = 5 * 1024 * 1024; const REQUEST_MAX_BYTES = 6 * 1024 * 1024; const events = new Set(); const ADMIN_TOKEN = process.env.ADMIN_TOKEN || crypto.randomBytes(24).toString('hex'); +const indexTemplate = await fs.readFile('./public/index.html', 'utf8'); const store = await createStore({ dataDir: DATA_DIR }); const uploadLimiter = await createUploadLimiter({ dataDir: DATA_DIR }); @@ -33,14 +36,48 @@ const server = http.createServer(async (req, res) => { try { const url = new URL(req.url || '/', `http://${req.headers.host || 'localhost'}`); + const baseUrl = publicBaseUrl(req); if (req.method === 'GET' && url.pathname === '/') { - return sendFile(res, './public/index.html'); + const nonce = crypto.randomBytes(16).toString('base64'); + withSecurityHeaders(res, { scriptNonce: nonce }); + return sendHtml(res, 200, renderIndex({ + template: indexTemplate, + baseUrl, + nonce, + approvedCount: store.count('approved') + })); + } + + if (req.method === 'GET' && url.pathname === '/robots.txt') { + return sendText(res, 200, robotsTxt(baseUrl)); + } + + if (req.method === 'GET' && url.pathname === '/llms.txt') { + return sendText(res, 200, llmsTxt(baseUrl)); + } + + if (req.method === 'GET' && url.pathname === '/site.webmanifest') { + return sendJson(res, 200, manifest(baseUrl)); + } + + if (req.method === 'GET' && url.pathname === '/feed.json') { + return sendJson(res, 200, feedJson(baseUrl, store.listForReview({ status: 'approved' }).memes)); + } + + if (req.method === 'GET' && url.pathname === '/sitemap.xml') { + const body = sitemapXml(baseUrl, store.listForReview({ status: 'approved' }).memes); + res.writeHead(200, { + 'Content-Type': 'application/xml; charset=utf-8', + 'Content-Length': String(Buffer.byteLength(body)) + }); + return res.end(body); } const adminPageMatch = url.pathname.match(/^\/admin\/([A-Za-z0-9_-]{24,128})$/); if (req.method === 'GET' && adminPageMatch) { if (!isAdminToken(adminPageMatch[1])) return sendText(res, 404, 'Not found'); + noIndex(res); return sendFile(res, './public/admin.html'); } @@ -49,12 +86,14 @@ const server = http.createServer(async (req, res) => { } if (req.method === 'GET' && url.pathname === '/api/memes') { + noIndex(res); const page = positiveInt(url.searchParams.get('page'), 1); const pageSize = Math.min(positiveInt(url.searchParams.get('pageSize'), 12), PAGE_SIZE_MAX); return sendJson(res, 200, store.list({ page, pageSize })); } if (req.method === 'GET' && url.pathname === '/api/status') { + noIndex(res); return sendJson(res, 200, { ok: true, memeCount: store.count('approved'), @@ -64,11 +103,13 @@ const server = http.createServer(async (req, res) => { } if (req.method === 'GET' && url.pathname === '/api/admin/pending') { + noIndex(res); if (!isAdminRequest(req)) return sendJson(res, 404, { error: 'Not found' }); return sendJson(res, 200, store.listForReview({ status: 'pending' })); } if (req.method === 'POST' && url.pathname === '/api/admin/approve') { + noIndex(res); if (!isAdminRequest(req)) return sendJson(res, 404, { error: 'Not found' }); const body = await readJsonBody(req, 64 * 1024); const approved = await store.approve(safeIds(body.ids)); @@ -76,6 +117,7 @@ const server = http.createServer(async (req, res) => { } if (req.method === 'POST' && url.pathname === '/api/admin/delete') { + noIndex(res); if (!isAdminRequest(req)) return sendJson(res, 404, { error: 'Not found' }); const body = await readJsonBody(req, 64 * 1024); const deleted = await store.delete(safeIds(body.ids)); @@ -83,6 +125,7 @@ const server = http.createServer(async (req, res) => { } if (req.method === 'GET' && url.pathname === '/api/events') { + noIndex(res); res.writeHead(200, { 'Content-Type': 'text/event-stream; charset=utf-8', 'Cache-Control': 'no-cache, no-transform', @@ -95,6 +138,7 @@ const server = http.createServer(async (req, res) => { } if (req.method === 'POST' && url.pathname === '/api/memes') { + noIndex(res); const contentLength = Number.parseInt(req.headers['content-length'] || '0', 10); if (!Number.isFinite(contentLength) || contentLength <= 0) { return sendJson(res, 411, { error: 'Missing upload size.' }); @@ -162,6 +206,7 @@ const server = http.createServer(async (req, res) => { const adminMediaMatch = url.pathname.match(/^\/admin-media\/([A-Za-z0-9_-]{24,128})\/([a-f0-9]{64})$/); if (req.method === 'GET' && adminMediaMatch) { + noIndex(res); if (!isAdminToken(adminMediaMatch[1])) return sendText(res, 404, 'Not found'); const meme = store.get(adminMediaMatch[2]); if (!meme) return sendText(res, 404, 'Not found'); @@ -174,6 +219,7 @@ const server = http.createServer(async (req, res) => { const downloadMatch = url.pathname.match(/^\/download\/([a-f0-9]{64})$/); if (req.method === 'GET' && downloadMatch) { + noIndex(res); if (store.get(downloadMatch[1])?.status !== 'approved') return sendText(res, 404, 'Not found'); const updated = await store.incrementMetric(downloadMatch[1], 'downloadCount'); const meme = store.get(downloadMatch[1]); @@ -186,6 +232,7 @@ const server = http.createServer(async (req, res) => { } if (req.method === 'GET' && url.pathname === '/healthz') { + noIndex(res); return sendJson(res, 200, { ok: true }); } diff --git a/src/http.js b/src/http.js index eba9769..3a929f5 100644 --- a/src/http.js +++ b/src/http.js @@ -13,15 +13,16 @@ const TYPES = new Map([ ['.ico', 'image/x-icon'] ]); -export function withSecurityHeaders(res) { +export function withSecurityHeaders(res, options = {}) { res.setHeader('Cross-Origin-Opener-Policy', 'same-origin'); res.setHeader('Referrer-Policy', 'same-origin'); res.setHeader('X-Content-Type-Options', 'nosniff'); res.setHeader('X-Frame-Options', 'DENY'); res.setHeader('Permissions-Policy', 'camera=(), microphone=(), geolocation=()'); + const scriptSrc = options.scriptNonce ? `script-src 'self' 'nonce-${options.scriptNonce}'; ` : ''; res.setHeader( 'Content-Security-Policy', - "default-src 'self'; style-src 'self' https://fonts.googleapis.com; font-src https://fonts.gstatic.com; img-src 'self' blob:; connect-src 'self'; object-src 'none'; base-uri 'none'; frame-ancestors 'none'; form-action 'self'" + `default-src 'self'; ${scriptSrc}style-src 'self' https://fonts.googleapis.com; font-src https://fonts.gstatic.com; img-src 'self' blob:; connect-src 'self'; object-src 'none'; base-uri 'none'; frame-ancestors 'none'; form-action 'self'` ); } @@ -43,6 +44,15 @@ export function sendText(res, statusCode, message) { res.end(body); } +export function sendHtml(res, statusCode, html) { + const body = Buffer.from(html); + res.writeHead(statusCode, { + 'Content-Type': 'text/html; charset=utf-8', + 'Content-Length': String(body.length) + }); + res.end(body); +} + export function sendFile(res, filePath, options = {}) { const resolved = options.absolute ? path.resolve(filePath) : path.resolve(filePath); if (!options.absolute && !resolved.startsWith(PUBLIC_ROOT + path.sep) && resolved !== path.join(PUBLIC_ROOT, 'index.html')) { diff --git a/src/seo.js b/src/seo.js new file mode 100644 index 0000000..e6f183b --- /dev/null +++ b/src/seo.js @@ -0,0 +1,182 @@ +const SITE_NAME = 'The Meme Protocol'; +const SITE_DESCRIPTION = 'A live, moderated meme stream for The Meme Protocol: square WebP memes, MEME_CONSENSUS_SCORE ranking, and community review.'; +const REPO_URL = 'https://git.yoonect.com/Nautilus/bitsforfree'; + +export function publicBaseUrl(req) { + if (process.env.SITE_URL) return cleanBase(process.env.SITE_URL); + const host = process.env.TRUST_PROXY === 'true' + ? req.headers['x-forwarded-host'] || req.headers.host + : req.headers.host; + const proto = process.env.TRUST_PROXY === 'true' + ? req.headers['x-forwarded-proto'] || 'https' + : 'http'; + return cleanBase(`${proto}://${host || 'localhost:8080'}`); +} + +export function renderIndex({ template, baseUrl, nonce, approvedCount }) { + const canonical = `${baseUrl}/`; + const image = `${baseUrl}/assets/yoonect-logo.png`; + const jsonLd = { + '@context': 'https://schema.org', + '@graph': [ + { + '@type': 'WebSite', + '@id': `${canonical}#website`, + name: SITE_NAME, + url: canonical, + description: SITE_DESCRIPTION, + inLanguage: 'en' + }, + { + '@type': 'CollectionPage', + '@id': `${canonical}#collection`, + name: SITE_NAME, + url: canonical, + description: SITE_DESCRIPTION, + isPartOf: { '@id': `${canonical}#website` }, + about: ['memes', 'internet culture', 'image gallery', 'community moderation'], + numberOfItems: approvedCount + }, + { + '@type': 'SoftwareApplication', + name: SITE_NAME, + applicationCategory: 'MultimediaApplication', + operatingSystem: 'Web', + url: canonical, + codeRepository: REPO_URL, + offers: { '@type': 'Offer', price: '0', priceCurrency: 'USD' } + } + ] + }; + + return template + .replaceAll('__SEO_TITLE__', escapeHtml(SITE_NAME)) + .replaceAll('__SEO_DESCRIPTION__', escapeHtml(SITE_DESCRIPTION)) + .replaceAll('__SEO_CANONICAL__', canonical) + .replaceAll('__SEO_IMAGE__', image) + .replaceAll('__SEO_JSON_LD__', escapeJsonScript(JSON.stringify(jsonLd))) + .replaceAll('__CSP_NONCE__', nonce); +} + +export function robotsTxt(baseUrl) { + return [ + 'User-agent: *', + 'Allow: /', + 'Disallow: /admin/', + 'Disallow: /admin-media/', + 'Disallow: /api/', + 'Disallow: /download/', + '', + `Sitemap: ${baseUrl}/sitemap.xml`, + '' + ].join('\n'); +} + +export function llmsTxt(baseUrl) { + return [ + '# The Meme Protocol', + '', + '> A live, moderated meme gallery. Uploads are square PNG/JPEG inputs normalized to metadata-stripped WebP, scored with MEME_CONSENSUS_SCORE, and published only after AI or admin approval.', + '', + 'Important URLs:', + `- Site: ${baseUrl}/`, + `- JSON feed: ${baseUrl}/feed.json`, + `- Sitemap: ${baseUrl}/sitemap.xml`, + `- Source: ${REPO_URL}`, + '', + 'Crawler guidance:', + '- Public approved meme images are available under /media/.', + '- Admin, review, upload, and API mutation routes are not public knowledge sources.', + '- The site is intentionally visual and sparse; use metadata, sitemap image entries, and JSON feed for machine summaries.', + '' + ].join('\n'); +} + +export function manifest(baseUrl) { + return { + name: SITE_NAME, + short_name: 'Meme Protocol', + description: SITE_DESCRIPTION, + start_url: '/', + scope: '/', + display: 'standalone', + background_color: '#050505', + theme_color: '#00ff41', + icons: [ + { + src: `${baseUrl}/assets/yoonect-logo.png`, + sizes: '1447x712', + type: 'image/png', + purpose: 'any' + } + ] + }; +} + +export function feedJson(baseUrl, memes) { + return { + version: 'https://jsonfeed.org/version/1.1', + title: SITE_NAME, + home_page_url: `${baseUrl}/`, + feed_url: `${baseUrl}/feed.json`, + description: SITE_DESCRIPTION, + items: memes.slice(0, 50).map((meme) => ({ + id: meme.id, + url: `${baseUrl}/media/${meme.id}`, + image: `${baseUrl}/media/${meme.id}`, + title: `Meme ${shortId(meme.id)}`, + content_text: `Approved meme with MEME_CONSENSUS_SCORE ${meme.moderationScore}/100.`, + date_published: meme.createdAt + })) + }; +} + +export function sitemapXml(baseUrl, memes) { + const images = memes.slice(0, 1000).map((meme) => [ + ' ', + ` ${xmlEscape(`${baseUrl}/media/${meme.id}`)}`, + ` ${xmlEscape(`Meme ${shortId(meme.id)} with MEME_CONSENSUS_SCORE ${meme.moderationScore}/100`)}`, + ' ' + ].join('\n')).join('\n'); + + return [ + '', + '', + ' ', + ` ${xmlEscape(`${baseUrl}/`)}`, + ' hourly', + ' 1.0', + images, + ' ', + '', + '' + ].join('\n'); +} + +export function noIndex(res) { + res.setHeader('X-Robots-Tag', 'noindex, nofollow, noarchive'); +} + +function cleanBase(value) { + return String(value).replace(/\/+$/, ''); +} + +function shortId(id) { + return `0x${id.slice(0, 4).toUpperCase()}...${id.slice(-4).toUpperCase()}`; +} + +function escapeHtml(value) { + return String(value) + .replaceAll('&', '&') + .replaceAll('<', '<') + .replaceAll('>', '>') + .replaceAll('"', '"'); +} + +function xmlEscape(value) { + return escapeHtml(value).replaceAll("'", '''); +} + +function escapeJsonScript(value) { + return value.replaceAll('<', '\\u003c').replaceAll('>', '\\u003e').replaceAll('&', '\\u0026'); +}