story-research-zapwall/lib/metadataExtractor.ts
2026-01-10 09:41:57 +01:00

752 lines
22 KiB
TypeScript

/**
* Extract objects from invisible metadata in Nostr notes
* Objects are stored in [Metadata JSON] sections in the note content
*/
import type { Event } from 'nostr-tools'
import { extractTagsFromEvent } from './nostrTagSystem'
import { generateAuthorHashId, generateSeriesHashId, generatePublicationHashId, generateReviewHashId, generatePurchaseHashId, generateReviewTipHashId, generateSponsoringHashId } from './hashIdGenerator'
export interface ExtractedAuthor {
type: 'author'
id: string
pubkey: string
authorName: string
presentation: string
contentDescription: string
mainnetAddress?: string
pictureUrl?: string
category: string
url?: string
eventId: string
}
export interface ExtractedSeries {
type: 'series'
id: string
pubkey: string
title: string
description: string
preview?: string
coverUrl?: string
category: string
eventId: string
}
export interface ExtractedPublication {
type: 'publication'
id: string
pubkey: string
title: string
preview: string
category: string
seriesId?: string
bannerUrl?: string
zapAmount: number
eventId: string
}
export interface ExtractedReview {
type: 'review'
id: string
pubkey: string
articleId: string
reviewerPubkey: string
content: string
title?: string
eventId: string
}
export interface ExtractedPurchase {
type: 'purchase'
id: string
payerPubkey: string
articleId: string
authorPubkey: string
amount: number
paymentHash: string
eventId: string
}
export interface ExtractedReviewTip {
type: 'review_tip'
id: string
payerPubkey: string
articleId: string
reviewId: string
reviewerPubkey: string
authorPubkey: string
amount: number
paymentHash: string
eventId: string
}
export interface ExtractedSponsoring {
type: 'sponsoring'
id: string
payerPubkey: string
authorPubkey: string
seriesId?: string
articleId?: string
amount: number
paymentHash: string
eventId: string
}
export type ExtractedObject =
| ExtractedAuthor
| ExtractedSeries
| ExtractedPublication
| ExtractedReview
| ExtractedPurchase
| ExtractedReviewTip
| ExtractedSponsoring
/**
* Extract JSON metadata from note content
*/
function extractMetadataJsonFromTag(event: { tags: string[][] }): Record<string, unknown> | null {
const jsonTag = event.tags.find((tag) => tag[0] === 'json')
if (jsonTag?.[1]) {
try {
const parsed: unknown = JSON.parse(jsonTag[1])
return isRecord(parsed) ? parsed : null
} catch (e) {
console.error('Error parsing JSON metadata from tag:', e)
return null
}
}
return null
}
function extractMetadataJson(content: string): Record<string, unknown> | null {
// Try invisible format first (with zero-width characters) - for backward compatibility
const invisibleJsonMatch = content.match(/[\u200B\u200C]\[Metadata JSON\][\u200B\u200C]\n[\u200B\u200C](.+)[\u200B\u200C]$/s)
if (invisibleJsonMatch?.[1]) {
try {
// Remove zero-width characters from JSON
const cleanedJson = invisibleJsonMatch[1].replace(/[\u200B\u200C\u200D\u200E\u200F]/g, '').trim()
const parsed: unknown = JSON.parse(cleanedJson)
return isRecord(parsed) ? parsed : null
} catch (e) {
console.error('Error parsing metadata JSON from invisible content:', e)
}
}
// Fallback to visible format (for backward compatibility)
const jsonMatch = content.match(/\[Metadata JSON\]\n(.+)$/s)
if (jsonMatch?.[1]) {
try {
const parsed: unknown = JSON.parse(jsonMatch[1].trim())
return isRecord(parsed) ? parsed : null
} catch (e) {
console.error('Error parsing metadata JSON from content:', e)
return null
}
}
return null
}
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null && !Array.isArray(value)
}
/**
* Extract author from event
*/
export async function extractAuthorFromEvent(event: Event): Promise<ExtractedAuthor | null> {
const tags = extractTagsFromEvent(event)
if (tags.type !== 'author') {
return null
}
const metadata = getMetadataFromEvent(event)
if (metadata?.type === 'author') {
const authorData = buildAuthorDataFromMetadata({ event, tags, metadata })
const id = await generateAuthorHashId(authorData)
return buildExtractedAuthor({ eventId: event.id, id, data: authorData, metadata })
}
// Fallback: extract from tags and visible content
// This is a simplified extraction - full data should be in metadata JSON
return null
}
function buildAuthorDataFromMetadata(params: {
event: Event
tags: ReturnType<typeof extractTagsFromEvent>
metadata: Record<string, unknown>
}): {
pubkey: string
authorName: string
presentation: string
contentDescription: string
mainnetAddress?: string
pictureUrl?: string
category: string
} {
const pubkey = firstString(params.metadata.pubkey, params.event.pubkey) ?? params.event.pubkey
const mainnetAddress = firstString(params.metadata.mainnetAddress)
const pictureUrl = firstString(params.metadata.pictureUrl)
return {
pubkey,
authorName: firstString(params.metadata.authorName) ?? '',
presentation: firstString(params.metadata.presentation) ?? '',
contentDescription: firstString(params.metadata.contentDescription) ?? '',
...(mainnetAddress ? { mainnetAddress } : {}),
...(pictureUrl ? { pictureUrl } : {}),
category: firstString(params.metadata.category, params.tags.category, 'sciencefiction') ?? 'sciencefiction',
}
}
function buildExtractedAuthor(params: {
eventId: string
id: string
data: {
pubkey: string
authorName: string
presentation: string
contentDescription: string
mainnetAddress?: string
pictureUrl?: string
category: string
}
metadata: Record<string, unknown>
}): ExtractedAuthor {
const url = firstString(params.metadata.url)
return {
type: 'author',
id: params.id,
pubkey: params.data.pubkey,
authorName: params.data.authorName,
presentation: params.data.presentation,
contentDescription: params.data.contentDescription,
category: params.data.category,
eventId: params.eventId,
...(params.data.mainnetAddress ? { mainnetAddress: params.data.mainnetAddress } : {}),
...(params.data.pictureUrl ? { pictureUrl: params.data.pictureUrl } : {}),
...(url ? { url } : {}),
}
}
function firstString(...values: unknown[]): string | undefined {
for (const value of values) {
if (typeof value === 'string') {
return value
}
}
return undefined
}
/**
* Extract series from event
*/
export async function extractSeriesFromEvent(event: Event): Promise<ExtractedSeries | null> {
const tags = extractTagsFromEvent(event)
if (tags.type !== 'series') {
return null
}
const metadata = getMetadataFromEvent(event)
if (metadata?.type === 'series') {
const seriesData = buildSeriesDataFromMetadata({ event, tags, metadata })
const id = await generateSeriesHashId(seriesData)
return buildExtractedSeries({ eventId: event.id, id, data: seriesData })
}
// Fallback: extract from tags
if (tags.title && tags.description) {
const seriesData = {
pubkey: event.pubkey,
title: tags.title,
description: tags.description,
preview: (tags.preview as string) ?? event.content.substring(0, 200),
category: tags.category ?? 'sciencefiction',
}
const seriesDataWithOptionals = {
...seriesData,
...(tags.coverUrl ? { coverUrl: tags.coverUrl } : {}),
}
const id = await generateSeriesHashId(seriesDataWithOptionals)
return buildExtractedSeries({ eventId: event.id, id, data: seriesDataWithOptionals })
}
return null
}
/**
* Extract publication from event
*/
export async function extractPublicationFromEvent(event: Event): Promise<ExtractedPublication | null> {
const tags = extractTagsFromEvent(event)
if (tags.type !== 'publication') {
return null
}
const metadata = getMetadataFromEvent(event)
if (metadata?.type === 'publication') {
const publicationData = buildPublicationDataFromMetadata({ event, tags, metadata })
const id = await generatePublicationHashId(publicationData)
const pages = readPublicationPages(metadata)
return buildExtractedPublication({ eventId: event.id, id, data: publicationData, pages })
}
// Fallback: extract from tags
if (tags.title) {
const publicationData = {
pubkey: event.pubkey,
title: tags.title,
preview: (tags.preview as string) ?? event.content.substring(0, 200),
category: tags.category ?? 'sciencefiction',
zapAmount: tags.zapAmount ?? 800,
}
const publicationDataWithOptionals = {
...publicationData,
...(tags.seriesId ? { seriesId: tags.seriesId } : {}),
...(tags.bannerUrl ? { bannerUrl: tags.bannerUrl } : {}),
}
const id = await generatePublicationHashId(publicationDataWithOptionals)
return buildExtractedPublication({ eventId: event.id, id, data: publicationDataWithOptionals, pages: undefined })
}
return null
}
/**
* Extract review from event
*/
export async function extractReviewFromEvent(event: Event): Promise<ExtractedReview | null> {
const tags = extractTagsFromEvent(event)
if (tags.type !== 'quote') {
return null
}
const metadata = getMetadataFromEvent(event)
const fromMetadata = await extractReviewFromMetadata({ event, tags, metadata })
if (fromMetadata) {
return fromMetadata
}
return extractReviewFromTags({ event, tags })
}
async function extractReviewFromMetadata(params: {
event: Event
tags: ReturnType<typeof extractTagsFromEvent>
metadata: Record<string, unknown> | null
}): Promise<ExtractedReview | null> {
if (params.metadata?.type !== 'review') {
return null
}
const reviewData = buildReviewDataFromMetadata({ event: params.event, tags: params.tags, metadata: params.metadata })
if (!reviewData) {
return null
}
const id = await generateReviewHashId(reviewData)
return { type: 'review', id, ...reviewData, eventId: params.event.id }
}
async function extractReviewFromTags(params: {
event: Event
tags: ReturnType<typeof extractTagsFromEvent>
}): Promise<ExtractedReview | null> {
if (!params.tags.articleId || !params.tags.reviewerPubkey) {
return null
}
const base = {
pubkey: params.event.pubkey,
articleId: params.tags.articleId,
reviewerPubkey: params.tags.reviewerPubkey,
content: params.event.content,
...(params.tags.title ? { title: params.tags.title } : {}),
}
return buildExtractedReviewFromTags({ ...base, eventId: params.event.id })
}
async function buildExtractedReviewFromTags(params: {
pubkey: string
articleId: string
reviewerPubkey: string
content: string
title?: string
eventId: string
}): Promise<ExtractedReview> {
const id = await generateReviewHashId({
pubkey: params.pubkey,
articleId: params.articleId,
reviewerPubkey: params.reviewerPubkey,
content: params.content,
...(params.title ? { title: params.title } : {}),
})
return {
type: 'review',
id,
pubkey: params.pubkey,
articleId: params.articleId,
reviewerPubkey: params.reviewerPubkey,
content: params.content,
eventId: params.eventId,
...(params.title ? { title: params.title } : {}),
}
}
function getMetadataFromEvent(event: Event): Record<string, unknown> | null {
return extractMetadataJsonFromTag(event) ?? extractMetadataJson(event.content)
}
function buildSeriesDataFromMetadata(params: {
event: Event
tags: ReturnType<typeof extractTagsFromEvent>
metadata: Record<string, unknown>
}): { pubkey: string; title: string; description: string; preview: string; coverUrl?: string; category: string } {
const title = firstString(params.metadata.title, params.tags.title) ?? ''
const preview = firstString(params.metadata.preview, params.tags.preview) ?? params.event.content.substring(0, 200)
const pubkey = firstString(params.metadata.pubkey, params.event.pubkey) ?? params.event.pubkey
const coverUrl = firstString(params.metadata.coverUrl, params.tags.coverUrl)
const result: { pubkey: string; title: string; description: string; preview: string; coverUrl?: string; category: string } = {
pubkey,
title,
description: firstString(params.metadata.description) ?? '',
preview,
category: firstString(params.metadata.category, params.tags.category, 'sciencefiction') ?? 'sciencefiction',
}
if (coverUrl) {
result.coverUrl = coverUrl
}
return result
}
function buildExtractedSeries(params: {
eventId: string
id: string
data: { pubkey: string; title: string; description: string; preview: string; coverUrl?: string; category: string }
}): ExtractedSeries {
return {
type: 'series',
id: params.id,
pubkey: params.data.pubkey,
title: params.data.title,
description: params.data.description,
category: params.data.category,
eventId: params.eventId,
...(params.data.coverUrl ? { coverUrl: params.data.coverUrl } : {}),
...(params.data.preview ? { preview: params.data.preview } : {}),
}
}
function buildPublicationDataFromMetadata(params: {
event: Event
tags: ReturnType<typeof extractTagsFromEvent>
metadata: Record<string, unknown>
}): {
pubkey: string
title: string
preview: string
category: string
seriesId?: string
bannerUrl?: string
zapAmount: number
} {
const result: {
pubkey: string
title: string
preview: string
category: string
seriesId?: string
bannerUrl?: string
zapAmount: number
} = {
pubkey: firstString(params.metadata.pubkey, params.event.pubkey) ?? params.event.pubkey,
title: firstString(params.metadata.title, params.tags.title) ?? '',
preview: firstString(params.metadata.preview, params.tags.preview) ?? params.event.content.substring(0, 200),
category: firstString(params.metadata.category, params.tags.category, 'sciencefiction') ?? 'sciencefiction',
zapAmount: firstNumber(params.metadata.zapAmount, params.tags.zapAmount) ?? 800,
}
const seriesId = firstString(params.metadata.seriesId, params.tags.seriesId)
if (seriesId) {
result.seriesId = seriesId
}
const bannerUrl = firstString(params.metadata.bannerUrl, params.tags.bannerUrl)
if (bannerUrl) {
result.bannerUrl = bannerUrl
}
return result
}
function readPublicationPages(metadata: Record<string, unknown>): Array<{ number: number; type: 'markdown' | 'image'; content: string }> | undefined {
const pages = metadata.pages as Array<{ number: number; type: 'markdown' | 'image'; content: string }> | undefined
return pages && Array.isArray(pages) && pages.length > 0 ? pages : undefined
}
function buildExtractedPublication(params: {
eventId: string
id: string
data: {
pubkey: string
title: string
preview: string
category: string
seriesId?: string
bannerUrl?: string
zapAmount: number
}
pages: Array<{ number: number; type: 'markdown' | 'image'; content: string }> | undefined
}): ExtractedPublication {
return {
type: 'publication',
id: params.id,
pubkey: params.data.pubkey,
title: params.data.title,
preview: params.data.preview,
category: params.data.category,
zapAmount: params.data.zapAmount,
eventId: params.eventId,
...(params.data.seriesId ? { seriesId: params.data.seriesId } : {}),
...(params.data.bannerUrl ? { bannerUrl: params.data.bannerUrl } : {}),
...(params.pages ? { pages: params.pages } : {}),
}
}
function buildReviewDataFromMetadata(params: {
event: Event
tags: ReturnType<typeof extractTagsFromEvent>
metadata: Record<string, unknown>
}): { pubkey: string; articleId: string; reviewerPubkey: string; content: string; title?: string } | null {
const articleId = firstString(params.metadata.articleId, params.tags.articleId) ?? ''
const reviewerPubkey = firstString(params.metadata.reviewerPubkey, params.tags.reviewerPubkey, params.event.pubkey) ?? params.event.pubkey
if (!articleId || !reviewerPubkey) {
return null
}
const title = firstString(params.metadata.title, params.tags.title)
const base: { pubkey: string; articleId: string; reviewerPubkey: string; content: string; title?: string } = {
pubkey: firstString(params.metadata.pubkey, params.event.pubkey) ?? params.event.pubkey,
articleId,
reviewerPubkey,
content: firstString(params.metadata.content, params.event.content) ?? params.event.content,
}
if (title) {
base.title = title
}
return base
}
function firstNumber(...values: unknown[]): number | undefined {
for (const value of values) {
if (typeof value === 'number') {
return value
}
}
return undefined
}
/**
* Extract purchase from zap receipt (kind 9735)
*/
export async function extractPurchaseFromEvent(event: Event): Promise<ExtractedPurchase | null> {
const kind = readZapReceiptKind(event)
if (kind !== 'purchase') {
return null
}
const authorPubkey = readTagValue(event, 'p')
const articleId = readTagValue(event, 'e')
const amountSats = readAmountSats(event)
if (!authorPubkey || !articleId || amountSats === undefined) {
return null
}
const purchaseData = {
payerPubkey: event.pubkey,
articleId,
authorPubkey,
amount: amountSats,
paymentHash: readPaymentHash(event),
}
const id = await generatePurchaseHashId(purchaseData)
return { type: 'purchase', id, ...purchaseData, eventId: event.id }
}
/**
* Extract review tip from zap receipt (kind 9735)
*/
export async function extractReviewTipFromEvent(event: Event): Promise<ExtractedReviewTip | null> {
const kind = readZapReceiptKind(event)
if (kind !== 'review_tip') {
return null
}
const authorPubkey = readTagValue(event, 'p')
const articleId = readTagValue(event, 'e')
const reviewId = readTagValue(event, 'review_id')
const reviewerPubkey = readTagValue(event, 'reviewer')
const amountSats = readAmountSats(event)
if (!authorPubkey || !articleId || !reviewId || !reviewerPubkey || amountSats === undefined) {
return null
}
const tipData = {
payerPubkey: event.pubkey,
articleId,
reviewId,
reviewerPubkey,
authorPubkey,
amount: amountSats,
paymentHash: readPaymentHash(event),
}
const id = await generateReviewTipHashId(tipData)
return { type: 'review_tip', id, ...tipData, eventId: event.id }
}
/**
* Extract sponsoring from zap receipt (kind 9735)
*/
export async function extractSponsoringFromEvent(event: Event): Promise<ExtractedSponsoring | null> {
const kind = readZapReceiptKind(event)
if (kind !== 'sponsoring') {
return null
}
const authorPubkey = readTagValue(event, 'p')
const amountSats = readAmountSats(event)
if (!authorPubkey || amountSats === undefined) {
return null
}
const sponsoringData = buildSponsoringData({ event, authorPubkey, amountSats })
const id = await generateSponsoringHashId(buildSponsoringHashInput(sponsoringData))
return buildExtractedSponsoring({ id, eventId: event.id, sponsoringData })
}
function buildSponsoringData(params: { event: Event; authorPubkey: string; amountSats: number }): {
payerPubkey: string
authorPubkey: string
seriesId: string | undefined
articleId: string | undefined
amount: number
paymentHash: string
} {
return {
payerPubkey: params.event.pubkey,
authorPubkey: params.authorPubkey,
seriesId: readTagValue(params.event, 'series'),
articleId: resolveSponsoringArticleId({
articleTag: readTagValue(params.event, 'article'),
eTag: readTagValue(params.event, 'e'),
}),
amount: params.amountSats,
paymentHash: readPaymentHash(params.event),
}
}
function buildSponsoringHashInput(params: {
payerPubkey: string
authorPubkey: string
seriesId: string | undefined
articleId: string | undefined
amount: number
paymentHash: string
}): Parameters<typeof generateSponsoringHashId>[0] {
return {
payerPubkey: params.payerPubkey,
authorPubkey: params.authorPubkey,
amount: params.amount,
paymentHash: params.paymentHash,
...(params.seriesId ? { seriesId: params.seriesId } : {}),
...(params.articleId ? { articleId: params.articleId } : {}),
}
}
function buildExtractedSponsoring(params: {
id: string
eventId: string
sponsoringData: {
payerPubkey: string
authorPubkey: string
seriesId: string | undefined
articleId: string | undefined
amount: number
paymentHash: string
}
}): ExtractedSponsoring {
return {
type: 'sponsoring',
id: params.id,
payerPubkey: params.sponsoringData.payerPubkey,
authorPubkey: params.sponsoringData.authorPubkey,
amount: params.sponsoringData.amount,
paymentHash: params.sponsoringData.paymentHash,
eventId: params.eventId,
...(params.sponsoringData.seriesId ? { seriesId: params.sponsoringData.seriesId } : {}),
...(params.sponsoringData.articleId ? { articleId: params.sponsoringData.articleId } : {}),
}
}
function readTagValue(event: Event, key: string): string | undefined {
return event.tags.find((tag) => tag[0] === key)?.[1]
}
function readZapReceiptKind(event: Event): string | undefined {
if (event.kind !== 9735) {
return undefined
}
return readTagValue(event, 'kind_type')
}
function readAmountSats(event: Event): number | undefined {
const amountTag = readTagValue(event, 'amount')
if (!amountTag) {
return undefined
}
const millisats = parseInt(amountTag, 10)
if (Number.isNaN(millisats)) {
return undefined
}
return millisats / 1000
}
function readPaymentHash(event: Event): string {
return readTagValue(event, 'payment_hash') ?? event.id
}
function resolveSponsoringArticleId(params: { articleTag: string | undefined; eTag: string | undefined }): string | undefined {
return params.articleTag ?? params.eTag
}
/**
* Extract all objects from an event
*/
export async function extractObjectsFromEvent(event: Event): Promise<ExtractedObject[]> {
const results: ExtractedObject[] = []
// Try to extract each type
const author = await extractAuthorFromEvent(event)
if (author) {results.push(author)}
const series = await extractSeriesFromEvent(event)
if (series) {results.push(series)}
const publication = await extractPublicationFromEvent(event)
if (publication) {results.push(publication)}
const review = await extractReviewFromEvent(event)
if (review) {results.push(review)}
const purchase = await extractPurchaseFromEvent(event)
if (purchase) {results.push(purchase)}
const reviewTip = await extractReviewTipFromEvent(event)
if (reviewTip) {results.push(reviewTip)}
const sponsoring = await extractSponsoringFromEvent(event)
if (sponsoring) {results.push(sponsoring)}
return results
}