diff --git a/.dvc-store.dvc b/.dvc-store.dvc new file mode 100644 index 0000000..beaedb2 --- /dev/null +++ b/.dvc-store.dvc @@ -0,0 +1,6 @@ +outs: +- md5: c633654a20f23d76af34689f7e27d58a.dir + size: 729964105 + nfiles: 111 + hash: md5 + path: .dvc-store diff --git a/.dvc/.gitignore b/.dvc/.gitignore new file mode 100644 index 0000000..528f30c --- /dev/null +++ b/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/.dvc/config b/.dvc/config new file mode 100644 index 0000000..21df50b --- /dev/null +++ b/.dvc/config @@ -0,0 +1,9 @@ +[core] + analytics = false + remote = r2 +['remote "r2"'] + url = s3://share/sec-cybert + endpointurl = https://0a665ba1f35a38354b3f623be13f14bd.r2.cloudflarestorage.com + region = auto +['remote "public"'] + url = https://share.lightningcode.dev/sec-cybert diff --git a/.dvcignore b/.dvcignore new file mode 100644 index 0000000..5197305 --- /dev/null +++ b/.dvcignore @@ -0,0 +1,3 @@ +# Add patterns of files dvc should ignore, which could improve +# the performance. Learn more at +# https://dvc.org/doc/user-guide/dvcignore diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..731f10e --- /dev/null +++ b/.env.example @@ -0,0 +1,10 @@ +# OpenRouter (GenAI labeling pipeline) +OPENROUTER_API_KEY="" + +# Cloudflare R2 (DVC data storage) +R2_BUCKET="share" +R2_ENDPOINT="https://0a665ba1f35a38354b3f623be13f14bd.r2.cloudflarestorage.com" +R2_PUBLIC_URL="https://share.lightningcode.dev" +R2_API_TOKEN="" +R2_ACCESS_KEY_ID="" +R2_SECRET_ACCESS_KEY="" diff --git a/.gitignore b/.gitignore index 425ee1a..97532fb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,9 @@ -# Data (too large for git) +# Data (too large for git — managed by DVC) data/ models/ checkpoints/ +.dvc-store/ +*.tar.zst # Dependencies ts/node_modules/ @@ -52,3 +54,4 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json # Finder (MacOS) folder config .DS_Store python/*.whl +/.dvc-store diff --git a/CLAUDE.md b/CLAUDE.md index a35aeb3..78379c2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -55,6 +55,14 @@ All commands run from repo root via `bun run