Add files via upload

This commit is contained in:
Daniel Ringel 2026-01-03 17:14:34 +01:00 committed by GitHub
parent e9c2930a9e
commit 8ba94c1481
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 396 additions and 0 deletions

19
hooks/use-mobile.ts Normal file
View File

@ -0,0 +1,19 @@
import * as React from "react"
const MOBILE_BREAKPOINT = 768
export function useIsMobile() {
const [isMobile, setIsMobile] = React.useState<boolean | undefined>(undefined)
React.useEffect(() => {
const mql = window.matchMedia(`(max-width: ${MOBILE_BREAKPOINT - 1}px)`)
const onChange = () => {
setIsMobile(window.innerWidth < MOBILE_BREAKPOINT)
}
mql.addEventListener("change", onChange)
setIsMobile(window.innerWidth < MOBILE_BREAKPOINT)
return () => mql.removeEventListener("change", onChange)
}, [])
return !!isMobile
}

95
lib/moderation.ts Normal file
View File

@ -0,0 +1,95 @@
import OpenAI from 'openai';
import {
MODERATION_DENIAL_MESSAGE_SEXUAL,
MODERATION_DENIAL_MESSAGE_SEXUAL_MINORS,
MODERATION_DENIAL_MESSAGE_HARASSMENT,
MODERATION_DENIAL_MESSAGE_HARASSMENT_THREATENING,
MODERATION_DENIAL_MESSAGE_HATE,
MODERATION_DENIAL_MESSAGE_HATE_THREATENING,
MODERATION_DENIAL_MESSAGE_ILLICIT,
MODERATION_DENIAL_MESSAGE_ILLICIT_VIOLENT,
MODERATION_DENIAL_MESSAGE_SELF_HARM,
MODERATION_DENIAL_MESSAGE_SELF_HARM_INTENT,
MODERATION_DENIAL_MESSAGE_SELF_HARM_INSTRUCTIONS,
MODERATION_DENIAL_MESSAGE_VIOLENCE,
MODERATION_DENIAL_MESSAGE_VIOLENCE_GRAPHIC,
MODERATION_DENIAL_MESSAGE_DEFAULT,
} from '@/config';
export interface ModerationResult {
flagged: boolean;
denialMessage?: string;
category?: string;
}
const CATEGORY_DENIAL_MESSAGES: Record<string, string> = {
'sexual': MODERATION_DENIAL_MESSAGE_SEXUAL,
'sexual/minors': MODERATION_DENIAL_MESSAGE_SEXUAL_MINORS,
'harassment': MODERATION_DENIAL_MESSAGE_HARASSMENT,
'harassment/threatening': MODERATION_DENIAL_MESSAGE_HARASSMENT_THREATENING,
'hate': MODERATION_DENIAL_MESSAGE_HATE,
'hate/threatening': MODERATION_DENIAL_MESSAGE_HATE_THREATENING,
'illicit': MODERATION_DENIAL_MESSAGE_ILLICIT,
'illicit/violent': MODERATION_DENIAL_MESSAGE_ILLICIT_VIOLENT,
'self-harm': MODERATION_DENIAL_MESSAGE_SELF_HARM,
'self-harm/intent': MODERATION_DENIAL_MESSAGE_SELF_HARM_INTENT,
'self-harm/instructions': MODERATION_DENIAL_MESSAGE_SELF_HARM_INSTRUCTIONS,
'violence': MODERATION_DENIAL_MESSAGE_VIOLENCE,
'violence/graphic': MODERATION_DENIAL_MESSAGE_VIOLENCE_GRAPHIC,
};
const CATEGORY_CHECK_ORDER: string[] = [
'sexual/minors',
'sexual',
'harassment/threatening',
'harassment',
'hate/threatening',
'hate',
'illicit/violent',
'illicit',
'self-harm/instructions',
'self-harm/intent',
'self-harm',
'violence/graphic',
'violence',
];
export async function isContentFlagged(text: string): Promise<ModerationResult> {
if (!text || text.trim().length === 0) {
return { flagged: false };
}
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
try {
const moderationResult = await openai.moderations.create({
input: text,
});
const result = moderationResult.results[0];
if (!result?.flagged) {
return { flagged: false };
}
const categories = result.categories;
for (const category of CATEGORY_CHECK_ORDER) {
if (categories[category as keyof typeof categories] === true) {
return {
flagged: true,
category,
denialMessage: CATEGORY_DENIAL_MESSAGES[category] || MODERATION_DENIAL_MESSAGE_DEFAULT,
};
}
}
return {
flagged: true,
denialMessage: MODERATION_DENIAL_MESSAGE_DEFAULT,
};
} catch (error) {
return { flagged: false };
}
}

33
lib/pinecone.ts Normal file
View File

@ -0,0 +1,33 @@
import { Pinecone } from '@pinecone-database/pinecone';
import { PINECONE_TOP_K } from '@/config';
import { searchResultsToChunks, getSourcesFromChunks, getContextFromSources } from '@/lib/sources';
import { PINECONE_INDEX_NAME } from '@/config';
if (!process.env.PINECONE_API_KEY) {
throw new Error('PINECONE_API_KEY is not set');
}
export const pinecone = new Pinecone({
apiKey: process.env.PINECONE_API_KEY,
});
export const pineconeIndex = pinecone.Index(PINECONE_INDEX_NAME);
export async function searchPinecone(
query: string,
): Promise<string> {
const results = await pineconeIndex.namespace('default').searchRecords({
query: {
inputs: {
text: query,
},
topK: PINECONE_TOP_K,
},
fields: ['text', 'pre_context', 'post_context', 'source_url', 'source_description', 'source_type', 'order'],
});
const chunks = searchResultsToChunks(results);
const sources = getSourcesFromChunks(chunks);
const context = getContextFromSources(sources);
return `< results > ${context} </results>`;
}

View File

@ -0,0 +1,37 @@
import { visit } from "unist-util-visit";
import type { Root } from "hast";
export function rehypeSingleCharLink() {
return (tree: Root) => {
visit(tree, "element", (node) => {
if (node.tagName === "a") {
const textContent = extractTextContent(node);
const trimmedText = textContent.trim();
if (trimmedText.length === 1) {
node.properties = node.properties || {};
const existingClass = Array.isArray(node.properties.className)
? node.properties.className.filter((c): c is string => typeof c === "string")
: typeof node.properties.className === "string"
? [node.properties.className]
: [];
node.properties.className = [...existingClass, "single-char-link"];
}
}
});
};
}
function extractTextContent(node: any): string {
if (node.type === "text") {
return node.value || "";
}
if (node.children && Array.isArray(node.children)) {
return node.children
.map((child: any) => extractTextContent(child))
.join("");
}
return "";
}

206
lib/sources.ts Normal file
View File

@ -0,0 +1,206 @@
import { Chunk, Source, Citation, chunkSchema, citationSchema } from "@/types/data";
export function getSourceKey(source_url: string, source_description: string): string {
return `${source_url}|||${source_description}`;
}
function getChunkSourceKey(chunk: Chunk): string {
return getSourceKey(chunk.source_url, chunk.source_description);
}
export function aggregateSourcesFromChunks(chunks: Chunk[]): Source[] {
const sourceMap = new Map<string, Source>();
chunks.forEach((chunk) => {
const key = getChunkSourceKey(chunk);
if (!sourceMap.has(key)) {
sourceMap.set(key, {
chunks: [],
source_url: chunk.source_url,
source_description: chunk.source_description,
source_name: chunk.source_name,
});
}
sourceMap.get(key)!.chunks.push(chunk);
});
return Array.from(sourceMap.values());
}
export function mergeSourcesWithChunks(existingSources: Source[], newChunks: Chunk[]): Source[] {
const sourceMap = new Map<string, Source>();
const sourceOrder: string[] = [];
existingSources.forEach((source) => {
const key = getSourceKey(source.source_url, source.source_description);
sourceMap.set(key, source);
sourceOrder.push(key);
});
newChunks.forEach((chunk) => {
const key = getChunkSourceKey(chunk);
if (sourceMap.has(key)) {
sourceMap.get(key)!.chunks.push(chunk);
} else {
const newSource: Source = {
chunks: [chunk],
source_url: chunk.source_url,
source_description: chunk.source_description,
source_name: chunk.source_name,
};
sourceMap.set(key, newSource);
sourceOrder.push(key);
}
});
return sourceOrder.map((key) => {
const source = sourceMap.get(key)!;
return sortChunksInSourceByOrder(source);
});
}
export function sortChunksInSourceByOrder(source: Source): Source {
source.chunks.sort((a, b) => a.order - b.order);
return source;
}
export function getSourcesFromChunks(chunks: Chunk[]): Source[] {
const sources = aggregateSourcesFromChunks(chunks);
return sources.map((source) => sortChunksInSourceByOrder(source));
}
export function buildContextFromOrderedChunks(
chunks: Chunk[],
citationNumber: number
): string {
if (chunks.length === 0) {
return "";
}
let context = "";
for (let i = 0; i < chunks.length; i++) {
const chunk = chunks[i];
if (i === 0 || chunk.pre_context !== chunks[i - 1].post_context) {
context += chunk.pre_context;
}
context += " " + chunk.text + ` [${citationNumber}] `;
if (
i === chunks.length - 1 ||
chunk.post_context !== chunks[i + 1].pre_context
) {
context += chunk.post_context;
}
if (i < chunks.length - 1) {
context += "\n\n";
}
}
return context.trim();
}
export function getContextFromSource(
source: Source,
citationNumber: number
): string {
return `
<excerpt-from-source>
# Source ${citationNumber}
## Source Name
${source.source_name}
## Source Description
${source.source_description}
## Source Citation
If you use this source, cite it using a markdown link with the source number as the link text, as follows: [${citationNumber}](${source.source_url})
## Excerpt from Source
${buildContextFromOrderedChunks(source.chunks, citationNumber)}
</excerpt-from-source>
`;
}
export function getContextFromSources(sources: Source[]): string {
return sources
.map((source, index) => getContextFromSource(source, index + 1))
.join("\n\n\n");
}
export function getCitationsFromSources(sources: Source[]): Citation[] {
return sources.map((source) =>
citationSchema.parse({
source_url: source.source_url,
source_description: source.source_description,
})
);
}
export function searchResultsToChunks(results: any): Chunk[] {
let records: any[] = [];
if (Array.isArray(results)) {
records = results;
} else if (results?.result?.hits && Array.isArray(results.result.hits)) {
records = results.result.hits;
} else if (results?.records && Array.isArray(results.records)) {
records = results.records;
} else if (results?.matches && Array.isArray(results.matches)) {
records = results.matches;
} else if (results?.data && Array.isArray(results.data)) {
records = results.data;
} else {
console.warn("searchResultsToChunks - Invalid results structure:", {
hasResults: !!results,
isArray: Array.isArray(results),
hasResultHits: !!(results && results.result && results.result.hits),
hasRecords: !!(results && results.records),
hasMatches: !!(results && results.matches),
hasData: !!(results && results.data),
resultsKeys: results ? Object.keys(results) : [],
resultsType: typeof results
});
return [];
}
return records
.map((record: any, index: number) => {
const fields = record.fields || record.values || record.data || {};
const metadata = record.metadata || {};
let classNo: number | undefined = undefined;
const classNoValue = fields.class_no !== undefined ? fields.class_no : (metadata.class_no !== undefined ? metadata.class_no : undefined);
if (classNoValue !== undefined && classNoValue !== null && classNoValue !== "") {
const parsed = typeof classNoValue === 'string' ? parseInt(classNoValue, 10) : classNoValue;
if (!isNaN(parsed)) {
classNo = parsed;
}
}
const chunkData = {
pre_context: fields.pre_context || metadata.pre_context || "",
text: fields.chunk_text || fields.text || metadata.chunk_text || metadata.text || record.text || "",
post_context: fields.post_context || metadata.post_context || "",
chunk_type: (fields.chunk_type || metadata.chunk_type || "text") as "image" | "text",
source_url: fields.source_url || metadata.source_url || "",
source_description: fields.source_description || metadata.source_description || "",
source_name: fields.source_name || metadata.source_name || "",
order: fields.order !== undefined ? fields.order : (metadata.order !== undefined ? metadata.order : 0),
};
try {
const parsed = chunkSchema.parse(chunkData);
return parsed;
} catch (error) {
return null;
}
})
.filter((chunk: Chunk | null): chunk is Chunk => chunk !== null);
}
export function stripCitationsFromText(text: string): string {
return text.replace(/\[\d+\]/g, "").trim();
}

6
lib/utils.ts Normal file
View File

@ -0,0 +1,6 @@
import { clsx, type ClassValue } from "clsx"
import { twMerge } from "tailwind-merge"
export function cn(...inputs: ClassValue[]) {
return twMerge(clsx(inputs))
}