From 8ba94c1481b8451f2b19838073d8058b56ec9575 Mon Sep 17 00:00:00 2001 From: Daniel Ringel <33063149+dringel@users.noreply.github.com> Date: Sat, 3 Jan 2026 17:14:34 +0100 Subject: [PATCH] Add files via upload --- hooks/use-mobile.ts | 19 +++ lib/moderation.ts | 95 +++++++++++++++ lib/pinecone.ts | 33 ++++++ lib/rehype-single-char-link.ts | 37 ++++++ lib/sources.ts | 206 +++++++++++++++++++++++++++++++++ lib/utils.ts | 6 + 6 files changed, 396 insertions(+) create mode 100644 hooks/use-mobile.ts create mode 100644 lib/moderation.ts create mode 100644 lib/pinecone.ts create mode 100644 lib/rehype-single-char-link.ts create mode 100644 lib/sources.ts create mode 100644 lib/utils.ts diff --git a/hooks/use-mobile.ts b/hooks/use-mobile.ts new file mode 100644 index 0000000..2b0fe1d --- /dev/null +++ b/hooks/use-mobile.ts @@ -0,0 +1,19 @@ +import * as React from "react" + +const MOBILE_BREAKPOINT = 768 + +export function useIsMobile() { + const [isMobile, setIsMobile] = React.useState(undefined) + + React.useEffect(() => { + const mql = window.matchMedia(`(max-width: ${MOBILE_BREAKPOINT - 1}px)`) + const onChange = () => { + setIsMobile(window.innerWidth < MOBILE_BREAKPOINT) + } + mql.addEventListener("change", onChange) + setIsMobile(window.innerWidth < MOBILE_BREAKPOINT) + return () => mql.removeEventListener("change", onChange) + }, []) + + return !!isMobile +} diff --git a/lib/moderation.ts b/lib/moderation.ts new file mode 100644 index 0000000..555b834 --- /dev/null +++ b/lib/moderation.ts @@ -0,0 +1,95 @@ +import OpenAI from 'openai'; +import { + MODERATION_DENIAL_MESSAGE_SEXUAL, + MODERATION_DENIAL_MESSAGE_SEXUAL_MINORS, + MODERATION_DENIAL_MESSAGE_HARASSMENT, + MODERATION_DENIAL_MESSAGE_HARASSMENT_THREATENING, + MODERATION_DENIAL_MESSAGE_HATE, + MODERATION_DENIAL_MESSAGE_HATE_THREATENING, + MODERATION_DENIAL_MESSAGE_ILLICIT, + MODERATION_DENIAL_MESSAGE_ILLICIT_VIOLENT, + MODERATION_DENIAL_MESSAGE_SELF_HARM, + MODERATION_DENIAL_MESSAGE_SELF_HARM_INTENT, + MODERATION_DENIAL_MESSAGE_SELF_HARM_INSTRUCTIONS, + MODERATION_DENIAL_MESSAGE_VIOLENCE, + MODERATION_DENIAL_MESSAGE_VIOLENCE_GRAPHIC, + MODERATION_DENIAL_MESSAGE_DEFAULT, +} from '@/config'; + +export interface ModerationResult { + flagged: boolean; + denialMessage?: string; + category?: string; +} + +const CATEGORY_DENIAL_MESSAGES: Record = { + 'sexual': MODERATION_DENIAL_MESSAGE_SEXUAL, + 'sexual/minors': MODERATION_DENIAL_MESSAGE_SEXUAL_MINORS, + 'harassment': MODERATION_DENIAL_MESSAGE_HARASSMENT, + 'harassment/threatening': MODERATION_DENIAL_MESSAGE_HARASSMENT_THREATENING, + 'hate': MODERATION_DENIAL_MESSAGE_HATE, + 'hate/threatening': MODERATION_DENIAL_MESSAGE_HATE_THREATENING, + 'illicit': MODERATION_DENIAL_MESSAGE_ILLICIT, + 'illicit/violent': MODERATION_DENIAL_MESSAGE_ILLICIT_VIOLENT, + 'self-harm': MODERATION_DENIAL_MESSAGE_SELF_HARM, + 'self-harm/intent': MODERATION_DENIAL_MESSAGE_SELF_HARM_INTENT, + 'self-harm/instructions': MODERATION_DENIAL_MESSAGE_SELF_HARM_INSTRUCTIONS, + 'violence': MODERATION_DENIAL_MESSAGE_VIOLENCE, + 'violence/graphic': MODERATION_DENIAL_MESSAGE_VIOLENCE_GRAPHIC, +}; + +const CATEGORY_CHECK_ORDER: string[] = [ + 'sexual/minors', + 'sexual', + 'harassment/threatening', + 'harassment', + 'hate/threatening', + 'hate', + 'illicit/violent', + 'illicit', + 'self-harm/instructions', + 'self-harm/intent', + 'self-harm', + 'violence/graphic', + 'violence', +]; + +export async function isContentFlagged(text: string): Promise { + if (!text || text.trim().length === 0) { + return { flagged: false }; + } + + const openai = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, + }); + + try { + const moderationResult = await openai.moderations.create({ + input: text, + }); + + const result = moderationResult.results[0]; + if (!result?.flagged) { + return { flagged: false }; + } + + const categories = result.categories; + for (const category of CATEGORY_CHECK_ORDER) { + if (categories[category as keyof typeof categories] === true) { + return { + flagged: true, + category, + denialMessage: CATEGORY_DENIAL_MESSAGES[category] || MODERATION_DENIAL_MESSAGE_DEFAULT, + }; + } + } + + return { + flagged: true, + denialMessage: MODERATION_DENIAL_MESSAGE_DEFAULT, + }; + } catch (error) { + return { flagged: false }; + } +} + diff --git a/lib/pinecone.ts b/lib/pinecone.ts new file mode 100644 index 0000000..b6b62a5 --- /dev/null +++ b/lib/pinecone.ts @@ -0,0 +1,33 @@ +import { Pinecone } from '@pinecone-database/pinecone'; +import { PINECONE_TOP_K } from '@/config'; +import { searchResultsToChunks, getSourcesFromChunks, getContextFromSources } from '@/lib/sources'; +import { PINECONE_INDEX_NAME } from '@/config'; + +if (!process.env.PINECONE_API_KEY) { + throw new Error('PINECONE_API_KEY is not set'); +} + +export const pinecone = new Pinecone({ + apiKey: process.env.PINECONE_API_KEY, +}); + +export const pineconeIndex = pinecone.Index(PINECONE_INDEX_NAME); + +export async function searchPinecone( + query: string, +): Promise { + const results = await pineconeIndex.namespace('default').searchRecords({ + query: { + inputs: { + text: query, + }, + topK: PINECONE_TOP_K, + }, + fields: ['text', 'pre_context', 'post_context', 'source_url', 'source_description', 'source_type', 'order'], + }); + + const chunks = searchResultsToChunks(results); + const sources = getSourcesFromChunks(chunks); + const context = getContextFromSources(sources); + return `< results > ${context} `; +} \ No newline at end of file diff --git a/lib/rehype-single-char-link.ts b/lib/rehype-single-char-link.ts new file mode 100644 index 0000000..10b34e1 --- /dev/null +++ b/lib/rehype-single-char-link.ts @@ -0,0 +1,37 @@ +import { visit } from "unist-util-visit"; +import type { Root } from "hast"; + +export function rehypeSingleCharLink() { + return (tree: Root) => { + visit(tree, "element", (node) => { + if (node.tagName === "a") { + const textContent = extractTextContent(node); + const trimmedText = textContent.trim(); + if (trimmedText.length === 1) { + node.properties = node.properties || {}; + const existingClass = Array.isArray(node.properties.className) + ? node.properties.className.filter((c): c is string => typeof c === "string") + : typeof node.properties.className === "string" + ? [node.properties.className] + : []; + + node.properties.className = [...existingClass, "single-char-link"]; + } + } + }); + }; +} + +function extractTextContent(node: any): string { + if (node.type === "text") { + return node.value || ""; + } + + if (node.children && Array.isArray(node.children)) { + return node.children + .map((child: any) => extractTextContent(child)) + .join(""); + } + + return ""; +} \ No newline at end of file diff --git a/lib/sources.ts b/lib/sources.ts new file mode 100644 index 0000000..5dcb164 --- /dev/null +++ b/lib/sources.ts @@ -0,0 +1,206 @@ +import { Chunk, Source, Citation, chunkSchema, citationSchema } from "@/types/data"; + +export function getSourceKey(source_url: string, source_description: string): string { + return `${source_url}|||${source_description}`; +} + +function getChunkSourceKey(chunk: Chunk): string { + return getSourceKey(chunk.source_url, chunk.source_description); +} + +export function aggregateSourcesFromChunks(chunks: Chunk[]): Source[] { + const sourceMap = new Map(); + + chunks.forEach((chunk) => { + const key = getChunkSourceKey(chunk); + if (!sourceMap.has(key)) { + sourceMap.set(key, { + chunks: [], + source_url: chunk.source_url, + source_description: chunk.source_description, + source_name: chunk.source_name, + }); + } + + sourceMap.get(key)!.chunks.push(chunk); + }); + + return Array.from(sourceMap.values()); +} + +export function mergeSourcesWithChunks(existingSources: Source[], newChunks: Chunk[]): Source[] { + const sourceMap = new Map(); + const sourceOrder: string[] = []; + + existingSources.forEach((source) => { + const key = getSourceKey(source.source_url, source.source_description); + sourceMap.set(key, source); + sourceOrder.push(key); + }); + + newChunks.forEach((chunk) => { + const key = getChunkSourceKey(chunk); + if (sourceMap.has(key)) { + sourceMap.get(key)!.chunks.push(chunk); + } else { + const newSource: Source = { + chunks: [chunk], + source_url: chunk.source_url, + source_description: chunk.source_description, + source_name: chunk.source_name, + }; + sourceMap.set(key, newSource); + sourceOrder.push(key); + } + }); + + return sourceOrder.map((key) => { + const source = sourceMap.get(key)!; + return sortChunksInSourceByOrder(source); + }); +} + +export function sortChunksInSourceByOrder(source: Source): Source { + source.chunks.sort((a, b) => a.order - b.order); + return source; +} + +export function getSourcesFromChunks(chunks: Chunk[]): Source[] { + const sources = aggregateSourcesFromChunks(chunks); + return sources.map((source) => sortChunksInSourceByOrder(source)); +} + +export function buildContextFromOrderedChunks( + chunks: Chunk[], + citationNumber: number +): string { + if (chunks.length === 0) { + return ""; + } + + let context = ""; + + for (let i = 0; i < chunks.length; i++) { + const chunk = chunks[i]; + + if (i === 0 || chunk.pre_context !== chunks[i - 1].post_context) { + context += chunk.pre_context; + } + + context += " " + chunk.text + ` [${citationNumber}] `; + + if ( + i === chunks.length - 1 || + chunk.post_context !== chunks[i + 1].pre_context + ) { + context += chunk.post_context; + } + + if (i < chunks.length - 1) { + context += "\n\n"; + } + } + + return context.trim(); +} + +export function getContextFromSource( + source: Source, + citationNumber: number +): string { + return ` + + # Source ${citationNumber} + ## Source Name + ${source.source_name} + ## Source Description + ${source.source_description} + ## Source Citation + If you use this source, cite it using a markdown link with the source number as the link text, as follows: [${citationNumber}](${source.source_url}) + ## Excerpt from Source + ${buildContextFromOrderedChunks(source.chunks, citationNumber)} + + `; +} + +export function getContextFromSources(sources: Source[]): string { + return sources + .map((source, index) => getContextFromSource(source, index + 1)) + .join("\n\n\n"); +} + +export function getCitationsFromSources(sources: Source[]): Citation[] { + return sources.map((source) => + citationSchema.parse({ + source_url: source.source_url, + source_description: source.source_description, + }) + ); +} + +export function searchResultsToChunks(results: any): Chunk[] { + let records: any[] = []; + + if (Array.isArray(results)) { + records = results; + } else if (results?.result?.hits && Array.isArray(results.result.hits)) { + records = results.result.hits; + } else if (results?.records && Array.isArray(results.records)) { + records = results.records; + } else if (results?.matches && Array.isArray(results.matches)) { + records = results.matches; + } else if (results?.data && Array.isArray(results.data)) { + records = results.data; + } else { + console.warn("searchResultsToChunks - Invalid results structure:", { + hasResults: !!results, + isArray: Array.isArray(results), + hasResultHits: !!(results && results.result && results.result.hits), + hasRecords: !!(results && results.records), + hasMatches: !!(results && results.matches), + hasData: !!(results && results.data), + resultsKeys: results ? Object.keys(results) : [], + resultsType: typeof results + }); + return []; + } + + return records + .map((record: any, index: number) => { + const fields = record.fields || record.values || record.data || {}; + const metadata = record.metadata || {}; + + let classNo: number | undefined = undefined; + const classNoValue = fields.class_no !== undefined ? fields.class_no : (metadata.class_no !== undefined ? metadata.class_no : undefined); + if (classNoValue !== undefined && classNoValue !== null && classNoValue !== "") { + const parsed = typeof classNoValue === 'string' ? parseInt(classNoValue, 10) : classNoValue; + if (!isNaN(parsed)) { + classNo = parsed; + } + } + + const chunkData = { + pre_context: fields.pre_context || metadata.pre_context || "", + text: fields.chunk_text || fields.text || metadata.chunk_text || metadata.text || record.text || "", + post_context: fields.post_context || metadata.post_context || "", + chunk_type: (fields.chunk_type || metadata.chunk_type || "text") as "image" | "text", + source_url: fields.source_url || metadata.source_url || "", + source_description: fields.source_description || metadata.source_description || "", + source_name: fields.source_name || metadata.source_name || "", + order: fields.order !== undefined ? fields.order : (metadata.order !== undefined ? metadata.order : 0), + }; + + try { + const parsed = chunkSchema.parse(chunkData); + return parsed; + } catch (error) { + return null; + } + }) + .filter((chunk: Chunk | null): chunk is Chunk => chunk !== null); +} + +export function stripCitationsFromText(text: string): string { + return text.replace(/\[\d+\]/g, "").trim(); +} + diff --git a/lib/utils.ts b/lib/utils.ts new file mode 100644 index 0000000..bd0c391 --- /dev/null +++ b/lib/utils.ts @@ -0,0 +1,6 @@ +import { clsx, type ClassValue } from "clsx" +import { twMerge } from "tailwind-merge" + +export function cn(...inputs: ClassValue[]) { + return twMerge(clsx(inputs)) +}