From 46a3416526f30c87b2a4ec9247cd29de9f1d6ced Mon Sep 17 00:00:00 2001 From: Joey Eamigh <55670930+JoeyEamigh@users.noreply.github.com> Date: Sun, 5 Apr 2026 11:42:21 -0400 Subject: [PATCH] fixing backfill/live data bug --- scripts/backfill.ts | 10 +++++-- src/app/api/ingest/electricity/route.ts | 39 +++++++++---------------- src/app/api/ingest/generation/route.ts | 8 ++++- 3 files changed, 28 insertions(+), 29 deletions(-) diff --git a/scripts/backfill.ts b/scripts/backfill.ts index ba2cfef..db53554 100644 --- a/scripts/backfill.ts +++ b/scripts/backfill.ts @@ -30,7 +30,7 @@ const prisma = new PrismaClient({ adapter }); // --------------------------------------------------------------------------- /** EIA RTO hourly data begins 2019-01-01 for most ISOs */ -const BACKFILL_START = '2019-01-01'; +const BACKFILL_START = process.env.BACKFILL_START ?? '2019-01-01'; const ALL_REGIONS: RegionCode[] = [ 'PJM', @@ -323,11 +323,15 @@ async function backfillElectricity(): Promise { const chunks = generateQuarterChunks(BACKFILL_START, end); log(` ${chunks.length} quarterly chunks from ${BACKFILL_START} to ${end}`); - // Fetch retail prices upfront (one call covers all months + all states) + // Fetch retail prices upfront (one call covers all months + all states). + // Always start at least 12 months before the backfill window to guarantee we + // pick up the most recently reported monthly data (EIA retail lags by months). const retailPriceByRegionMonth = new Map(); log(' Fetching retail electricity prices...'); try { - const startMonth = BACKFILL_START.slice(0, 7); + const backfillDate = new Date(`${BACKFILL_START}T00:00:00Z`); + backfillDate.setUTCMonth(backfillDate.getUTCMonth() - 12); + const startMonth = backfillDate.toISOString().slice(0, 7); const endMonth = end.slice(0, 7); const retailPrices = await getRetailElectricityPrices({ start: startMonth, end: endMonth }); for (const rp of retailPrices) { diff --git a/src/app/api/ingest/electricity/route.ts b/src/app/api/ingest/electricity/route.ts index 7cc199e..c394969 100644 --- a/src/app/api/ingest/electricity/route.ts +++ b/src/app/api/ingest/electricity/route.ts @@ -71,10 +71,17 @@ export async function GET(request: NextRequest): Promise { const regionIdByCode = new Map(gridRegions.map(r => [r.code, r.id])); // Fetch retail electricity prices (monthly) to apply to hourly records. + // EIA monthly retail data lags by several months, so we always fetch a wide + // window (6 months back) to guarantee we get the most recently reported month. // Key: "REGION:YYYY-MM" -> $/MWh const retailPriceByRegionMonth = new Map(); try { - const retailPrices = await getRetailElectricityPrices({ start, end }); + const retailStart = (() => { + const d = new Date(); + d.setUTCMonth(d.getUTCMonth() - 6); + return d.toISOString().slice(0, 10); + })(); + const retailPrices = await getRetailElectricityPrices({ start: retailStart, end }); for (const rp of retailPrices) { retailPriceByRegionMonth.set(`${rp.regionCode}:${rp.period}`, rp.priceMwh); } @@ -93,29 +100,6 @@ export async function GET(request: NextRequest): Promise { } } - // If API returned no prices (e.g. recent months not yet reported), - // fall back to the last known non-zero price per region from the database. - if (retailPriceByRegionMonth.size === 0) { - const dbFallbacks = await prisma.$queryRaw>` - SELECT r.code, ep.price_mwh - FROM electricity_prices ep - JOIN grid_regions r ON ep.region_id = r.id - WHERE ep.price_mwh > 0 - AND (r.code, ep.timestamp) IN ( - SELECT r2.code, MAX(ep2.timestamp) - FROM electricity_prices ep2 - JOIN grid_regions r2 ON ep2.region_id = r2.id - WHERE ep2.price_mwh > 0 - GROUP BY r2.code - ) - `; - for (const row of dbFallbacks) { - if (!latestPriceByRegion.has(row.code)) { - latestPriceByRegion.set(row.code, row.price_mwh); - } - } - } - for (const regionCode of regions) { const regionId = regionIdByCode.get(regionCode); if (!regionId) { @@ -159,8 +143,13 @@ export async function GET(request: NextRequest): Promise { for (const point of validPoints) { const month = point.timestamp.toISOString().slice(0, 7); - const basePrice = + const rawBasePrice = retailPriceByRegionMonth.get(`${regionCode}:${month}`) ?? latestPriceByRegion.get(regionCode) ?? 0; + // Cap the base price to a sane maximum. US industrial retail prices never exceed + // ~$500/MWh even in extreme markets. This prevents runaway values if bad data + // somehow leaks into the retail price source. + const MAX_RETAIL_BASE_MWH = 500; + const basePrice = Math.min(rawBasePrice, MAX_RETAIL_BASE_MWH); // Add demand-based variation: scale price between 0.8x and 1.2x based on demand const demandRatio = peakDemand > 0 ? point.valueMw / peakDemand : 0.5; const priceMwh = basePrice > 0 ? basePrice * (0.8 + 0.4 * demandRatio) : 0; diff --git a/src/app/api/ingest/generation/route.ts b/src/app/api/ingest/generation/route.ts index 5bb2e83..5ad2af2 100644 --- a/src/app/api/ingest/generation/route.ts +++ b/src/app/api/ingest/generation/route.ts @@ -79,7 +79,13 @@ export async function GET(request: NextRequest): Promise { try { const fuelData = await getFuelTypeData(regionCode, { start, end }); - const validPoints = fuelData.filter((p): p is typeof p & { generationMw: number } => p.generationMw !== null); + // Cap at 500 GW — no single region/fuel combo should ever exceed this. + // EIA occasionally returns garbage values (overflow, bad readings). + const MAX_GENERATION_MW = 500_000; + const validPoints = fuelData.filter( + (p): p is typeof p & { generationMw: number } => + p.generationMw !== null && p.generationMw >= 0 && p.generationMw <= MAX_GENERATION_MW, + ); if (validPoints.length === 0) continue;