Add files via upload
This commit is contained in:
parent
e9c2930a9e
commit
8ba94c1481
19
hooks/use-mobile.ts
Normal file
19
hooks/use-mobile.ts
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
import * as React from "react"
|
||||||
|
|
||||||
|
const MOBILE_BREAKPOINT = 768
|
||||||
|
|
||||||
|
export function useIsMobile() {
|
||||||
|
const [isMobile, setIsMobile] = React.useState<boolean | undefined>(undefined)
|
||||||
|
|
||||||
|
React.useEffect(() => {
|
||||||
|
const mql = window.matchMedia(`(max-width: ${MOBILE_BREAKPOINT - 1}px)`)
|
||||||
|
const onChange = () => {
|
||||||
|
setIsMobile(window.innerWidth < MOBILE_BREAKPOINT)
|
||||||
|
}
|
||||||
|
mql.addEventListener("change", onChange)
|
||||||
|
setIsMobile(window.innerWidth < MOBILE_BREAKPOINT)
|
||||||
|
return () => mql.removeEventListener("change", onChange)
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
return !!isMobile
|
||||||
|
}
|
||||||
95
lib/moderation.ts
Normal file
95
lib/moderation.ts
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
import OpenAI from 'openai';
|
||||||
|
import {
|
||||||
|
MODERATION_DENIAL_MESSAGE_SEXUAL,
|
||||||
|
MODERATION_DENIAL_MESSAGE_SEXUAL_MINORS,
|
||||||
|
MODERATION_DENIAL_MESSAGE_HARASSMENT,
|
||||||
|
MODERATION_DENIAL_MESSAGE_HARASSMENT_THREATENING,
|
||||||
|
MODERATION_DENIAL_MESSAGE_HATE,
|
||||||
|
MODERATION_DENIAL_MESSAGE_HATE_THREATENING,
|
||||||
|
MODERATION_DENIAL_MESSAGE_ILLICIT,
|
||||||
|
MODERATION_DENIAL_MESSAGE_ILLICIT_VIOLENT,
|
||||||
|
MODERATION_DENIAL_MESSAGE_SELF_HARM,
|
||||||
|
MODERATION_DENIAL_MESSAGE_SELF_HARM_INTENT,
|
||||||
|
MODERATION_DENIAL_MESSAGE_SELF_HARM_INSTRUCTIONS,
|
||||||
|
MODERATION_DENIAL_MESSAGE_VIOLENCE,
|
||||||
|
MODERATION_DENIAL_MESSAGE_VIOLENCE_GRAPHIC,
|
||||||
|
MODERATION_DENIAL_MESSAGE_DEFAULT,
|
||||||
|
} from '@/config';
|
||||||
|
|
||||||
|
export interface ModerationResult {
|
||||||
|
flagged: boolean;
|
||||||
|
denialMessage?: string;
|
||||||
|
category?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
const CATEGORY_DENIAL_MESSAGES: Record<string, string> = {
|
||||||
|
'sexual': MODERATION_DENIAL_MESSAGE_SEXUAL,
|
||||||
|
'sexual/minors': MODERATION_DENIAL_MESSAGE_SEXUAL_MINORS,
|
||||||
|
'harassment': MODERATION_DENIAL_MESSAGE_HARASSMENT,
|
||||||
|
'harassment/threatening': MODERATION_DENIAL_MESSAGE_HARASSMENT_THREATENING,
|
||||||
|
'hate': MODERATION_DENIAL_MESSAGE_HATE,
|
||||||
|
'hate/threatening': MODERATION_DENIAL_MESSAGE_HATE_THREATENING,
|
||||||
|
'illicit': MODERATION_DENIAL_MESSAGE_ILLICIT,
|
||||||
|
'illicit/violent': MODERATION_DENIAL_MESSAGE_ILLICIT_VIOLENT,
|
||||||
|
'self-harm': MODERATION_DENIAL_MESSAGE_SELF_HARM,
|
||||||
|
'self-harm/intent': MODERATION_DENIAL_MESSAGE_SELF_HARM_INTENT,
|
||||||
|
'self-harm/instructions': MODERATION_DENIAL_MESSAGE_SELF_HARM_INSTRUCTIONS,
|
||||||
|
'violence': MODERATION_DENIAL_MESSAGE_VIOLENCE,
|
||||||
|
'violence/graphic': MODERATION_DENIAL_MESSAGE_VIOLENCE_GRAPHIC,
|
||||||
|
};
|
||||||
|
|
||||||
|
const CATEGORY_CHECK_ORDER: string[] = [
|
||||||
|
'sexual/minors',
|
||||||
|
'sexual',
|
||||||
|
'harassment/threatening',
|
||||||
|
'harassment',
|
||||||
|
'hate/threatening',
|
||||||
|
'hate',
|
||||||
|
'illicit/violent',
|
||||||
|
'illicit',
|
||||||
|
'self-harm/instructions',
|
||||||
|
'self-harm/intent',
|
||||||
|
'self-harm',
|
||||||
|
'violence/graphic',
|
||||||
|
'violence',
|
||||||
|
];
|
||||||
|
|
||||||
|
export async function isContentFlagged(text: string): Promise<ModerationResult> {
|
||||||
|
if (!text || text.trim().length === 0) {
|
||||||
|
return { flagged: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
const openai = new OpenAI({
|
||||||
|
apiKey: process.env.OPENAI_API_KEY,
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
const moderationResult = await openai.moderations.create({
|
||||||
|
input: text,
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = moderationResult.results[0];
|
||||||
|
if (!result?.flagged) {
|
||||||
|
return { flagged: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
const categories = result.categories;
|
||||||
|
for (const category of CATEGORY_CHECK_ORDER) {
|
||||||
|
if (categories[category as keyof typeof categories] === true) {
|
||||||
|
return {
|
||||||
|
flagged: true,
|
||||||
|
category,
|
||||||
|
denialMessage: CATEGORY_DENIAL_MESSAGES[category] || MODERATION_DENIAL_MESSAGE_DEFAULT,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
flagged: true,
|
||||||
|
denialMessage: MODERATION_DENIAL_MESSAGE_DEFAULT,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
return { flagged: false };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
33
lib/pinecone.ts
Normal file
33
lib/pinecone.ts
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
import { Pinecone } from '@pinecone-database/pinecone';
|
||||||
|
import { PINECONE_TOP_K } from '@/config';
|
||||||
|
import { searchResultsToChunks, getSourcesFromChunks, getContextFromSources } from '@/lib/sources';
|
||||||
|
import { PINECONE_INDEX_NAME } from '@/config';
|
||||||
|
|
||||||
|
if (!process.env.PINECONE_API_KEY) {
|
||||||
|
throw new Error('PINECONE_API_KEY is not set');
|
||||||
|
}
|
||||||
|
|
||||||
|
export const pinecone = new Pinecone({
|
||||||
|
apiKey: process.env.PINECONE_API_KEY,
|
||||||
|
});
|
||||||
|
|
||||||
|
export const pineconeIndex = pinecone.Index(PINECONE_INDEX_NAME);
|
||||||
|
|
||||||
|
export async function searchPinecone(
|
||||||
|
query: string,
|
||||||
|
): Promise<string> {
|
||||||
|
const results = await pineconeIndex.namespace('default').searchRecords({
|
||||||
|
query: {
|
||||||
|
inputs: {
|
||||||
|
text: query,
|
||||||
|
},
|
||||||
|
topK: PINECONE_TOP_K,
|
||||||
|
},
|
||||||
|
fields: ['text', 'pre_context', 'post_context', 'source_url', 'source_description', 'source_type', 'order'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const chunks = searchResultsToChunks(results);
|
||||||
|
const sources = getSourcesFromChunks(chunks);
|
||||||
|
const context = getContextFromSources(sources);
|
||||||
|
return `< results > ${context} </results>`;
|
||||||
|
}
|
||||||
37
lib/rehype-single-char-link.ts
Normal file
37
lib/rehype-single-char-link.ts
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
import { visit } from "unist-util-visit";
|
||||||
|
import type { Root } from "hast";
|
||||||
|
|
||||||
|
export function rehypeSingleCharLink() {
|
||||||
|
return (tree: Root) => {
|
||||||
|
visit(tree, "element", (node) => {
|
||||||
|
if (node.tagName === "a") {
|
||||||
|
const textContent = extractTextContent(node);
|
||||||
|
const trimmedText = textContent.trim();
|
||||||
|
if (trimmedText.length === 1) {
|
||||||
|
node.properties = node.properties || {};
|
||||||
|
const existingClass = Array.isArray(node.properties.className)
|
||||||
|
? node.properties.className.filter((c): c is string => typeof c === "string")
|
||||||
|
: typeof node.properties.className === "string"
|
||||||
|
? [node.properties.className]
|
||||||
|
: [];
|
||||||
|
|
||||||
|
node.properties.className = [...existingClass, "single-char-link"];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractTextContent(node: any): string {
|
||||||
|
if (node.type === "text") {
|
||||||
|
return node.value || "";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (node.children && Array.isArray(node.children)) {
|
||||||
|
return node.children
|
||||||
|
.map((child: any) => extractTextContent(child))
|
||||||
|
.join("");
|
||||||
|
}
|
||||||
|
|
||||||
|
return "";
|
||||||
|
}
|
||||||
206
lib/sources.ts
Normal file
206
lib/sources.ts
Normal file
@ -0,0 +1,206 @@
|
|||||||
|
import { Chunk, Source, Citation, chunkSchema, citationSchema } from "@/types/data";
|
||||||
|
|
||||||
|
export function getSourceKey(source_url: string, source_description: string): string {
|
||||||
|
return `${source_url}|||${source_description}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getChunkSourceKey(chunk: Chunk): string {
|
||||||
|
return getSourceKey(chunk.source_url, chunk.source_description);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function aggregateSourcesFromChunks(chunks: Chunk[]): Source[] {
|
||||||
|
const sourceMap = new Map<string, Source>();
|
||||||
|
|
||||||
|
chunks.forEach((chunk) => {
|
||||||
|
const key = getChunkSourceKey(chunk);
|
||||||
|
if (!sourceMap.has(key)) {
|
||||||
|
sourceMap.set(key, {
|
||||||
|
chunks: [],
|
||||||
|
source_url: chunk.source_url,
|
||||||
|
source_description: chunk.source_description,
|
||||||
|
source_name: chunk.source_name,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
sourceMap.get(key)!.chunks.push(chunk);
|
||||||
|
});
|
||||||
|
|
||||||
|
return Array.from(sourceMap.values());
|
||||||
|
}
|
||||||
|
|
||||||
|
export function mergeSourcesWithChunks(existingSources: Source[], newChunks: Chunk[]): Source[] {
|
||||||
|
const sourceMap = new Map<string, Source>();
|
||||||
|
const sourceOrder: string[] = [];
|
||||||
|
|
||||||
|
existingSources.forEach((source) => {
|
||||||
|
const key = getSourceKey(source.source_url, source.source_description);
|
||||||
|
sourceMap.set(key, source);
|
||||||
|
sourceOrder.push(key);
|
||||||
|
});
|
||||||
|
|
||||||
|
newChunks.forEach((chunk) => {
|
||||||
|
const key = getChunkSourceKey(chunk);
|
||||||
|
if (sourceMap.has(key)) {
|
||||||
|
sourceMap.get(key)!.chunks.push(chunk);
|
||||||
|
} else {
|
||||||
|
const newSource: Source = {
|
||||||
|
chunks: [chunk],
|
||||||
|
source_url: chunk.source_url,
|
||||||
|
source_description: chunk.source_description,
|
||||||
|
source_name: chunk.source_name,
|
||||||
|
};
|
||||||
|
sourceMap.set(key, newSource);
|
||||||
|
sourceOrder.push(key);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return sourceOrder.map((key) => {
|
||||||
|
const source = sourceMap.get(key)!;
|
||||||
|
return sortChunksInSourceByOrder(source);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export function sortChunksInSourceByOrder(source: Source): Source {
|
||||||
|
source.chunks.sort((a, b) => a.order - b.order);
|
||||||
|
return source;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getSourcesFromChunks(chunks: Chunk[]): Source[] {
|
||||||
|
const sources = aggregateSourcesFromChunks(chunks);
|
||||||
|
return sources.map((source) => sortChunksInSourceByOrder(source));
|
||||||
|
}
|
||||||
|
|
||||||
|
export function buildContextFromOrderedChunks(
|
||||||
|
chunks: Chunk[],
|
||||||
|
citationNumber: number
|
||||||
|
): string {
|
||||||
|
if (chunks.length === 0) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
let context = "";
|
||||||
|
|
||||||
|
for (let i = 0; i < chunks.length; i++) {
|
||||||
|
const chunk = chunks[i];
|
||||||
|
|
||||||
|
if (i === 0 || chunk.pre_context !== chunks[i - 1].post_context) {
|
||||||
|
context += chunk.pre_context;
|
||||||
|
}
|
||||||
|
|
||||||
|
context += " " + chunk.text + ` [${citationNumber}] `;
|
||||||
|
|
||||||
|
if (
|
||||||
|
i === chunks.length - 1 ||
|
||||||
|
chunk.post_context !== chunks[i + 1].pre_context
|
||||||
|
) {
|
||||||
|
context += chunk.post_context;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i < chunks.length - 1) {
|
||||||
|
context += "\n\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return context.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getContextFromSource(
|
||||||
|
source: Source,
|
||||||
|
citationNumber: number
|
||||||
|
): string {
|
||||||
|
return `
|
||||||
|
<excerpt-from-source>
|
||||||
|
# Source ${citationNumber}
|
||||||
|
## Source Name
|
||||||
|
${source.source_name}
|
||||||
|
## Source Description
|
||||||
|
${source.source_description}
|
||||||
|
## Source Citation
|
||||||
|
If you use this source, cite it using a markdown link with the source number as the link text, as follows: [${citationNumber}](${source.source_url})
|
||||||
|
## Excerpt from Source
|
||||||
|
${buildContextFromOrderedChunks(source.chunks, citationNumber)}
|
||||||
|
</excerpt-from-source>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getContextFromSources(sources: Source[]): string {
|
||||||
|
return sources
|
||||||
|
.map((source, index) => getContextFromSource(source, index + 1))
|
||||||
|
.join("\n\n\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getCitationsFromSources(sources: Source[]): Citation[] {
|
||||||
|
return sources.map((source) =>
|
||||||
|
citationSchema.parse({
|
||||||
|
source_url: source.source_url,
|
||||||
|
source_description: source.source_description,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function searchResultsToChunks(results: any): Chunk[] {
|
||||||
|
let records: any[] = [];
|
||||||
|
|
||||||
|
if (Array.isArray(results)) {
|
||||||
|
records = results;
|
||||||
|
} else if (results?.result?.hits && Array.isArray(results.result.hits)) {
|
||||||
|
records = results.result.hits;
|
||||||
|
} else if (results?.records && Array.isArray(results.records)) {
|
||||||
|
records = results.records;
|
||||||
|
} else if (results?.matches && Array.isArray(results.matches)) {
|
||||||
|
records = results.matches;
|
||||||
|
} else if (results?.data && Array.isArray(results.data)) {
|
||||||
|
records = results.data;
|
||||||
|
} else {
|
||||||
|
console.warn("searchResultsToChunks - Invalid results structure:", {
|
||||||
|
hasResults: !!results,
|
||||||
|
isArray: Array.isArray(results),
|
||||||
|
hasResultHits: !!(results && results.result && results.result.hits),
|
||||||
|
hasRecords: !!(results && results.records),
|
||||||
|
hasMatches: !!(results && results.matches),
|
||||||
|
hasData: !!(results && results.data),
|
||||||
|
resultsKeys: results ? Object.keys(results) : [],
|
||||||
|
resultsType: typeof results
|
||||||
|
});
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
return records
|
||||||
|
.map((record: any, index: number) => {
|
||||||
|
const fields = record.fields || record.values || record.data || {};
|
||||||
|
const metadata = record.metadata || {};
|
||||||
|
|
||||||
|
let classNo: number | undefined = undefined;
|
||||||
|
const classNoValue = fields.class_no !== undefined ? fields.class_no : (metadata.class_no !== undefined ? metadata.class_no : undefined);
|
||||||
|
if (classNoValue !== undefined && classNoValue !== null && classNoValue !== "") {
|
||||||
|
const parsed = typeof classNoValue === 'string' ? parseInt(classNoValue, 10) : classNoValue;
|
||||||
|
if (!isNaN(parsed)) {
|
||||||
|
classNo = parsed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const chunkData = {
|
||||||
|
pre_context: fields.pre_context || metadata.pre_context || "",
|
||||||
|
text: fields.chunk_text || fields.text || metadata.chunk_text || metadata.text || record.text || "",
|
||||||
|
post_context: fields.post_context || metadata.post_context || "",
|
||||||
|
chunk_type: (fields.chunk_type || metadata.chunk_type || "text") as "image" | "text",
|
||||||
|
source_url: fields.source_url || metadata.source_url || "",
|
||||||
|
source_description: fields.source_description || metadata.source_description || "",
|
||||||
|
source_name: fields.source_name || metadata.source_name || "",
|
||||||
|
order: fields.order !== undefined ? fields.order : (metadata.order !== undefined ? metadata.order : 0),
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
const parsed = chunkSchema.parse(chunkData);
|
||||||
|
return parsed;
|
||||||
|
} catch (error) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.filter((chunk: Chunk | null): chunk is Chunk => chunk !== null);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function stripCitationsFromText(text: string): string {
|
||||||
|
return text.replace(/\[\d+\]/g, "").trim();
|
||||||
|
}
|
||||||
|
|
||||||
6
lib/utils.ts
Normal file
6
lib/utils.ts
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
import { clsx, type ClassValue } from "clsx"
|
||||||
|
import { twMerge } from "tailwind-merge"
|
||||||
|
|
||||||
|
export function cn(...inputs: ClassValue[]) {
|
||||||
|
return twMerge(clsx(inputs))
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user