Add files via upload
This commit is contained in:
parent
e9c2930a9e
commit
8ba94c1481
19
hooks/use-mobile.ts
Normal file
19
hooks/use-mobile.ts
Normal file
@ -0,0 +1,19 @@
|
||||
import * as React from "react"
|
||||
|
||||
const MOBILE_BREAKPOINT = 768
|
||||
|
||||
export function useIsMobile() {
|
||||
const [isMobile, setIsMobile] = React.useState<boolean | undefined>(undefined)
|
||||
|
||||
React.useEffect(() => {
|
||||
const mql = window.matchMedia(`(max-width: ${MOBILE_BREAKPOINT - 1}px)`)
|
||||
const onChange = () => {
|
||||
setIsMobile(window.innerWidth < MOBILE_BREAKPOINT)
|
||||
}
|
||||
mql.addEventListener("change", onChange)
|
||||
setIsMobile(window.innerWidth < MOBILE_BREAKPOINT)
|
||||
return () => mql.removeEventListener("change", onChange)
|
||||
}, [])
|
||||
|
||||
return !!isMobile
|
||||
}
|
||||
95
lib/moderation.ts
Normal file
95
lib/moderation.ts
Normal file
@ -0,0 +1,95 @@
|
||||
import OpenAI from 'openai';
|
||||
import {
|
||||
MODERATION_DENIAL_MESSAGE_SEXUAL,
|
||||
MODERATION_DENIAL_MESSAGE_SEXUAL_MINORS,
|
||||
MODERATION_DENIAL_MESSAGE_HARASSMENT,
|
||||
MODERATION_DENIAL_MESSAGE_HARASSMENT_THREATENING,
|
||||
MODERATION_DENIAL_MESSAGE_HATE,
|
||||
MODERATION_DENIAL_MESSAGE_HATE_THREATENING,
|
||||
MODERATION_DENIAL_MESSAGE_ILLICIT,
|
||||
MODERATION_DENIAL_MESSAGE_ILLICIT_VIOLENT,
|
||||
MODERATION_DENIAL_MESSAGE_SELF_HARM,
|
||||
MODERATION_DENIAL_MESSAGE_SELF_HARM_INTENT,
|
||||
MODERATION_DENIAL_MESSAGE_SELF_HARM_INSTRUCTIONS,
|
||||
MODERATION_DENIAL_MESSAGE_VIOLENCE,
|
||||
MODERATION_DENIAL_MESSAGE_VIOLENCE_GRAPHIC,
|
||||
MODERATION_DENIAL_MESSAGE_DEFAULT,
|
||||
} from '@/config';
|
||||
|
||||
export interface ModerationResult {
|
||||
flagged: boolean;
|
||||
denialMessage?: string;
|
||||
category?: string;
|
||||
}
|
||||
|
||||
const CATEGORY_DENIAL_MESSAGES: Record<string, string> = {
|
||||
'sexual': MODERATION_DENIAL_MESSAGE_SEXUAL,
|
||||
'sexual/minors': MODERATION_DENIAL_MESSAGE_SEXUAL_MINORS,
|
||||
'harassment': MODERATION_DENIAL_MESSAGE_HARASSMENT,
|
||||
'harassment/threatening': MODERATION_DENIAL_MESSAGE_HARASSMENT_THREATENING,
|
||||
'hate': MODERATION_DENIAL_MESSAGE_HATE,
|
||||
'hate/threatening': MODERATION_DENIAL_MESSAGE_HATE_THREATENING,
|
||||
'illicit': MODERATION_DENIAL_MESSAGE_ILLICIT,
|
||||
'illicit/violent': MODERATION_DENIAL_MESSAGE_ILLICIT_VIOLENT,
|
||||
'self-harm': MODERATION_DENIAL_MESSAGE_SELF_HARM,
|
||||
'self-harm/intent': MODERATION_DENIAL_MESSAGE_SELF_HARM_INTENT,
|
||||
'self-harm/instructions': MODERATION_DENIAL_MESSAGE_SELF_HARM_INSTRUCTIONS,
|
||||
'violence': MODERATION_DENIAL_MESSAGE_VIOLENCE,
|
||||
'violence/graphic': MODERATION_DENIAL_MESSAGE_VIOLENCE_GRAPHIC,
|
||||
};
|
||||
|
||||
const CATEGORY_CHECK_ORDER: string[] = [
|
||||
'sexual/minors',
|
||||
'sexual',
|
||||
'harassment/threatening',
|
||||
'harassment',
|
||||
'hate/threatening',
|
||||
'hate',
|
||||
'illicit/violent',
|
||||
'illicit',
|
||||
'self-harm/instructions',
|
||||
'self-harm/intent',
|
||||
'self-harm',
|
||||
'violence/graphic',
|
||||
'violence',
|
||||
];
|
||||
|
||||
export async function isContentFlagged(text: string): Promise<ModerationResult> {
|
||||
if (!text || text.trim().length === 0) {
|
||||
return { flagged: false };
|
||||
}
|
||||
|
||||
const openai = new OpenAI({
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
});
|
||||
|
||||
try {
|
||||
const moderationResult = await openai.moderations.create({
|
||||
input: text,
|
||||
});
|
||||
|
||||
const result = moderationResult.results[0];
|
||||
if (!result?.flagged) {
|
||||
return { flagged: false };
|
||||
}
|
||||
|
||||
const categories = result.categories;
|
||||
for (const category of CATEGORY_CHECK_ORDER) {
|
||||
if (categories[category as keyof typeof categories] === true) {
|
||||
return {
|
||||
flagged: true,
|
||||
category,
|
||||
denialMessage: CATEGORY_DENIAL_MESSAGES[category] || MODERATION_DENIAL_MESSAGE_DEFAULT,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
flagged: true,
|
||||
denialMessage: MODERATION_DENIAL_MESSAGE_DEFAULT,
|
||||
};
|
||||
} catch (error) {
|
||||
return { flagged: false };
|
||||
}
|
||||
}
|
||||
|
||||
33
lib/pinecone.ts
Normal file
33
lib/pinecone.ts
Normal file
@ -0,0 +1,33 @@
|
||||
import { Pinecone } from '@pinecone-database/pinecone';
|
||||
import { PINECONE_TOP_K } from '@/config';
|
||||
import { searchResultsToChunks, getSourcesFromChunks, getContextFromSources } from '@/lib/sources';
|
||||
import { PINECONE_INDEX_NAME } from '@/config';
|
||||
|
||||
if (!process.env.PINECONE_API_KEY) {
|
||||
throw new Error('PINECONE_API_KEY is not set');
|
||||
}
|
||||
|
||||
export const pinecone = new Pinecone({
|
||||
apiKey: process.env.PINECONE_API_KEY,
|
||||
});
|
||||
|
||||
export const pineconeIndex = pinecone.Index(PINECONE_INDEX_NAME);
|
||||
|
||||
export async function searchPinecone(
|
||||
query: string,
|
||||
): Promise<string> {
|
||||
const results = await pineconeIndex.namespace('default').searchRecords({
|
||||
query: {
|
||||
inputs: {
|
||||
text: query,
|
||||
},
|
||||
topK: PINECONE_TOP_K,
|
||||
},
|
||||
fields: ['text', 'pre_context', 'post_context', 'source_url', 'source_description', 'source_type', 'order'],
|
||||
});
|
||||
|
||||
const chunks = searchResultsToChunks(results);
|
||||
const sources = getSourcesFromChunks(chunks);
|
||||
const context = getContextFromSources(sources);
|
||||
return `< results > ${context} </results>`;
|
||||
}
|
||||
37
lib/rehype-single-char-link.ts
Normal file
37
lib/rehype-single-char-link.ts
Normal file
@ -0,0 +1,37 @@
|
||||
import { visit } from "unist-util-visit";
|
||||
import type { Root } from "hast";
|
||||
|
||||
export function rehypeSingleCharLink() {
|
||||
return (tree: Root) => {
|
||||
visit(tree, "element", (node) => {
|
||||
if (node.tagName === "a") {
|
||||
const textContent = extractTextContent(node);
|
||||
const trimmedText = textContent.trim();
|
||||
if (trimmedText.length === 1) {
|
||||
node.properties = node.properties || {};
|
||||
const existingClass = Array.isArray(node.properties.className)
|
||||
? node.properties.className.filter((c): c is string => typeof c === "string")
|
||||
: typeof node.properties.className === "string"
|
||||
? [node.properties.className]
|
||||
: [];
|
||||
|
||||
node.properties.className = [...existingClass, "single-char-link"];
|
||||
}
|
||||
}
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
function extractTextContent(node: any): string {
|
||||
if (node.type === "text") {
|
||||
return node.value || "";
|
||||
}
|
||||
|
||||
if (node.children && Array.isArray(node.children)) {
|
||||
return node.children
|
||||
.map((child: any) => extractTextContent(child))
|
||||
.join("");
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
206
lib/sources.ts
Normal file
206
lib/sources.ts
Normal file
@ -0,0 +1,206 @@
|
||||
import { Chunk, Source, Citation, chunkSchema, citationSchema } from "@/types/data";
|
||||
|
||||
export function getSourceKey(source_url: string, source_description: string): string {
|
||||
return `${source_url}|||${source_description}`;
|
||||
}
|
||||
|
||||
function getChunkSourceKey(chunk: Chunk): string {
|
||||
return getSourceKey(chunk.source_url, chunk.source_description);
|
||||
}
|
||||
|
||||
export function aggregateSourcesFromChunks(chunks: Chunk[]): Source[] {
|
||||
const sourceMap = new Map<string, Source>();
|
||||
|
||||
chunks.forEach((chunk) => {
|
||||
const key = getChunkSourceKey(chunk);
|
||||
if (!sourceMap.has(key)) {
|
||||
sourceMap.set(key, {
|
||||
chunks: [],
|
||||
source_url: chunk.source_url,
|
||||
source_description: chunk.source_description,
|
||||
source_name: chunk.source_name,
|
||||
});
|
||||
}
|
||||
|
||||
sourceMap.get(key)!.chunks.push(chunk);
|
||||
});
|
||||
|
||||
return Array.from(sourceMap.values());
|
||||
}
|
||||
|
||||
export function mergeSourcesWithChunks(existingSources: Source[], newChunks: Chunk[]): Source[] {
|
||||
const sourceMap = new Map<string, Source>();
|
||||
const sourceOrder: string[] = [];
|
||||
|
||||
existingSources.forEach((source) => {
|
||||
const key = getSourceKey(source.source_url, source.source_description);
|
||||
sourceMap.set(key, source);
|
||||
sourceOrder.push(key);
|
||||
});
|
||||
|
||||
newChunks.forEach((chunk) => {
|
||||
const key = getChunkSourceKey(chunk);
|
||||
if (sourceMap.has(key)) {
|
||||
sourceMap.get(key)!.chunks.push(chunk);
|
||||
} else {
|
||||
const newSource: Source = {
|
||||
chunks: [chunk],
|
||||
source_url: chunk.source_url,
|
||||
source_description: chunk.source_description,
|
||||
source_name: chunk.source_name,
|
||||
};
|
||||
sourceMap.set(key, newSource);
|
||||
sourceOrder.push(key);
|
||||
}
|
||||
});
|
||||
|
||||
return sourceOrder.map((key) => {
|
||||
const source = sourceMap.get(key)!;
|
||||
return sortChunksInSourceByOrder(source);
|
||||
});
|
||||
}
|
||||
|
||||
export function sortChunksInSourceByOrder(source: Source): Source {
|
||||
source.chunks.sort((a, b) => a.order - b.order);
|
||||
return source;
|
||||
}
|
||||
|
||||
export function getSourcesFromChunks(chunks: Chunk[]): Source[] {
|
||||
const sources = aggregateSourcesFromChunks(chunks);
|
||||
return sources.map((source) => sortChunksInSourceByOrder(source));
|
||||
}
|
||||
|
||||
export function buildContextFromOrderedChunks(
|
||||
chunks: Chunk[],
|
||||
citationNumber: number
|
||||
): string {
|
||||
if (chunks.length === 0) {
|
||||
return "";
|
||||
}
|
||||
|
||||
let context = "";
|
||||
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
const chunk = chunks[i];
|
||||
|
||||
if (i === 0 || chunk.pre_context !== chunks[i - 1].post_context) {
|
||||
context += chunk.pre_context;
|
||||
}
|
||||
|
||||
context += " " + chunk.text + ` [${citationNumber}] `;
|
||||
|
||||
if (
|
||||
i === chunks.length - 1 ||
|
||||
chunk.post_context !== chunks[i + 1].pre_context
|
||||
) {
|
||||
context += chunk.post_context;
|
||||
}
|
||||
|
||||
if (i < chunks.length - 1) {
|
||||
context += "\n\n";
|
||||
}
|
||||
}
|
||||
|
||||
return context.trim();
|
||||
}
|
||||
|
||||
export function getContextFromSource(
|
||||
source: Source,
|
||||
citationNumber: number
|
||||
): string {
|
||||
return `
|
||||
<excerpt-from-source>
|
||||
# Source ${citationNumber}
|
||||
## Source Name
|
||||
${source.source_name}
|
||||
## Source Description
|
||||
${source.source_description}
|
||||
## Source Citation
|
||||
If you use this source, cite it using a markdown link with the source number as the link text, as follows: [${citationNumber}](${source.source_url})
|
||||
## Excerpt from Source
|
||||
${buildContextFromOrderedChunks(source.chunks, citationNumber)}
|
||||
</excerpt-from-source>
|
||||
`;
|
||||
}
|
||||
|
||||
export function getContextFromSources(sources: Source[]): string {
|
||||
return sources
|
||||
.map((source, index) => getContextFromSource(source, index + 1))
|
||||
.join("\n\n\n");
|
||||
}
|
||||
|
||||
export function getCitationsFromSources(sources: Source[]): Citation[] {
|
||||
return sources.map((source) =>
|
||||
citationSchema.parse({
|
||||
source_url: source.source_url,
|
||||
source_description: source.source_description,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
export function searchResultsToChunks(results: any): Chunk[] {
|
||||
let records: any[] = [];
|
||||
|
||||
if (Array.isArray(results)) {
|
||||
records = results;
|
||||
} else if (results?.result?.hits && Array.isArray(results.result.hits)) {
|
||||
records = results.result.hits;
|
||||
} else if (results?.records && Array.isArray(results.records)) {
|
||||
records = results.records;
|
||||
} else if (results?.matches && Array.isArray(results.matches)) {
|
||||
records = results.matches;
|
||||
} else if (results?.data && Array.isArray(results.data)) {
|
||||
records = results.data;
|
||||
} else {
|
||||
console.warn("searchResultsToChunks - Invalid results structure:", {
|
||||
hasResults: !!results,
|
||||
isArray: Array.isArray(results),
|
||||
hasResultHits: !!(results && results.result && results.result.hits),
|
||||
hasRecords: !!(results && results.records),
|
||||
hasMatches: !!(results && results.matches),
|
||||
hasData: !!(results && results.data),
|
||||
resultsKeys: results ? Object.keys(results) : [],
|
||||
resultsType: typeof results
|
||||
});
|
||||
return [];
|
||||
}
|
||||
|
||||
return records
|
||||
.map((record: any, index: number) => {
|
||||
const fields = record.fields || record.values || record.data || {};
|
||||
const metadata = record.metadata || {};
|
||||
|
||||
let classNo: number | undefined = undefined;
|
||||
const classNoValue = fields.class_no !== undefined ? fields.class_no : (metadata.class_no !== undefined ? metadata.class_no : undefined);
|
||||
if (classNoValue !== undefined && classNoValue !== null && classNoValue !== "") {
|
||||
const parsed = typeof classNoValue === 'string' ? parseInt(classNoValue, 10) : classNoValue;
|
||||
if (!isNaN(parsed)) {
|
||||
classNo = parsed;
|
||||
}
|
||||
}
|
||||
|
||||
const chunkData = {
|
||||
pre_context: fields.pre_context || metadata.pre_context || "",
|
||||
text: fields.chunk_text || fields.text || metadata.chunk_text || metadata.text || record.text || "",
|
||||
post_context: fields.post_context || metadata.post_context || "",
|
||||
chunk_type: (fields.chunk_type || metadata.chunk_type || "text") as "image" | "text",
|
||||
source_url: fields.source_url || metadata.source_url || "",
|
||||
source_description: fields.source_description || metadata.source_description || "",
|
||||
source_name: fields.source_name || metadata.source_name || "",
|
||||
order: fields.order !== undefined ? fields.order : (metadata.order !== undefined ? metadata.order : 0),
|
||||
};
|
||||
|
||||
try {
|
||||
const parsed = chunkSchema.parse(chunkData);
|
||||
return parsed;
|
||||
} catch (error) {
|
||||
return null;
|
||||
}
|
||||
})
|
||||
.filter((chunk: Chunk | null): chunk is Chunk => chunk !== null);
|
||||
}
|
||||
|
||||
export function stripCitationsFromText(text: string): string {
|
||||
return text.replace(/\[\d+\]/g, "").trim();
|
||||
}
|
||||
|
||||
6
lib/utils.ts
Normal file
6
lib/utils.ts
Normal file
@ -0,0 +1,6 @@
|
||||
import { clsx, type ClassValue } from "clsx"
|
||||
import { twMerge } from "tailwind-merge"
|
||||
|
||||
export function cn(...inputs: ClassValue[]) {
|
||||
return twMerge(clsx(inputs))
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user