Skip to content

Commit

Permalink
add new scrape endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
Ben Sarmiento committed Aug 29, 2023
1 parent fdfb321 commit 91762b9
Show file tree
Hide file tree
Showing 7 changed files with 495 additions and 10 deletions.
8 changes: 2 additions & 6 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,7 @@ GH_PAT=github-access-token
PROXY=socks5h://tor:9050
SEARCH_SPEED_PASSWORD=123456

REDIS_SENTINEL_HOST=redis-sentinel
REDIS_PASSWORD=dmm123456

FIREBASE_PROJECT_ID=project-id
FIREBASE_CLIENT_EMAIL=[email protected]
FIREBASE_PRIVATE_KEY="-----BEGIN PRIVATE KEY-----\n...\n-----END PRIVATE KEY-----\n"
TMDB_KEY=abc123
MDBLIST_KEY=abc123

DATABASE_URL="postgresql://johndoe:randompassword@localhost:5432/mydb?schema=public"
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "debrid-media-manager",
"version": "2.5.10",
"version": "2.6.0",
"private": false,
"scripts": {
"dev": "next dev",
Expand Down
12 changes: 12 additions & 0 deletions prisma/schema.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,15 @@ model Cache {
value Json
updatedAt DateTime @updatedAt
}

model Scraped {
key String @id
value Json
updatedAt DateTime @updatedAt
}

model Search {
key String @id
value Json
updatedAt DateTime @updatedAt
}
196 changes: 196 additions & 0 deletions src/pages/api/scrape.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
import { SearchResult } from '@/services/btdigg-v2';
import {
createAxiosInstance,
scrapeResults,
flattenAndRemoveDuplicates,
groupByParsedTitle,
} from '@/services/btdigg-v2';
import { PlanetScaleCache } from '@/services/planetscale';
import axios from 'axios';
import { NextApiRequest, NextApiResponse } from 'next';
import { SocksProxyAgent } from 'socks-proxy-agent';

const tmdbKey = process.env.TMDB_KEY;
const mdblistKey = process.env.MDBLIST_KEY;
const getTmdbInfo = (imdbId: string) =>
`https://api.themoviedb.org/3/find/${imdbId}?api_key=${tmdbKey}&external_source=imdb_id`;
const getMdbInfo = (imdbId: string) => `https://mdblist.com/api/?apikey=${mdblistKey}&i=${imdbId}`;
function padWithZero(num: number) {
if (num < 10) {
return '0' + num;
} else {
return num.toString();
}
}
const cleanSearchQuery = (search: string): string => {
return search
.split(/[\s\=:\?\.\-\(\)\/]/) // split the search query into an array of elements
.filter((e) => e !== '') // filter out any empty elements
.map((e) => e.toLowerCase()) // convert each element to lowercase
.join(' ') // join the remaining elements with a single space
.replace(/[áàäâ]/g, 'a') // replace certain characters with their equivalent
.replace(/[éèëê]/g, 'e')
.replace(/[íìïî]/g, 'i')
.replace(/[óòöô]/g, 'o')
.replace(/[úùüû]/g, 'u')
.replace(/\s+/g, ' ') // replace multiple spaces with a single space
.trim();
};

const db = new PlanetScaleCache();

export type ScrapeResponse = {
status: string;
errorMessage?: string;
};
1;
export default async function handler(
req: NextApiRequest,
res: NextApiResponse<ScrapeResponse>
) {
const { imdbId, scrapePassword, override } = req.query;
if (process.env.SEARCH_SPEED_PASSWORD && scrapePassword !== process.env.SEARCH_SPEED_PASSWORD) {
res.status(403).json({ status: 'error', errorMessage: 'You are not authorized to use this feature' });
return;
}

if (!imdbId || !(typeof imdbId === 'string')) {
console.log(imdbId);
res.status(400).json({ status: 'error', errorMessage: 'Missing "imdbId" query parameter' });
return;
}

// imdbId to search for
const tmdbResponse = await axios.get(getTmdbInfo(imdbId.toString().trim()));

const movieTitles: string[] = [];
const tvTitles: string[] = [];

let tmdbItem: any = {};
let itemType: 'movie' | 'tv' = 'movie';

if (tmdbResponse.data.movie_results.length > 0) {
if (override && override !== 'true') {
const keyExists = await db.keyExists(`movie:${imdbId}`);
if (keyExists) {
res.status(200).json({ status: 'skipped' });
return;
}
}

itemType = 'movie';
tmdbItem = tmdbResponse.data.movie_results[0];
movieTitles.push(`"${cleanSearchQuery(tmdbItem.title)}"`);
movieTitles.push(`"${cleanSearchQuery(tmdbItem.title)}" ${tmdbItem.release_date.substring(0, 4)}`);

if (tmdbItem.original_title && tmdbItem.original_title !== tmdbItem.title) {
movieTitles.push(`"${tmdbItem.original_title}"`);
movieTitles.push(`"${tmdbItem.original_title}" ${tmdbItem.release_date.substring(0, 4)}`);
const mdbItem = await axios.get(getMdbInfo(imdbId.toString().trim()));
for (let rating of mdbItem.data.ratings) {
if (rating.source === 'tomatoes') {
const cleanedTitle = (
itemType === 'movie' ? rating.url.split('/m/') : rating.url.split('/tv/')
)[1].replaceAll('_', ' ');
movieTitles.push(`"${cleanedTitle}"`);
movieTitles.push(`"${cleanedTitle}" ${tmdbItem.release_date.substring(0, 4)}`);
}
}
}

try {
const results = [];
for (const movieTitle of movieTitles) {
for (const lType of ['720p', '1080p', '2160p', '']) {
results.push(
await scrapeResults(
createAxiosInstance(
new SocksProxyAgent(process.env.PROXY!, { timeout: 10000 })
),
`${movieTitle} ${lType}`.trim(),
lType || '1080p',
)
);
}
}
let processedResults = flattenAndRemoveDuplicates(results);
if (processedResults.length) processedResults = groupByParsedTitle(processedResults);

await db.saveScrapedResults<SearchResult[]>(`movie:${imdbId}`, processedResults);

res.status(200).json({ status: `scraped: ${processedResults.length} items` });
} catch (error: any) {
res.status(500).json({
status: 'error',
errorMessage: `An error occurred while scraping Btdigg (${error.message})`,
});
}
}

if (tmdbResponse.data.tv_results.length > 0) {
if (override && override !== 'true') {
const keyExists = await db.keyExists(`tv:${imdbId}:1`);
if (keyExists) {
res.status(200).json({ status: 'skipped' });
return;
}
}

itemType = 'tv';
tmdbItem = tmdbResponse.data.tv_results[0];
tvTitles.push(`"${cleanSearchQuery(tmdbItem.name)}"`);
tvTitles.push(`"${cleanSearchQuery(tmdbItem.name)}" ${tmdbItem.first_air_date.substring(0, 4)}`);

if (tmdbItem.original_name && tmdbItem.original_name !== tmdbItem.name) {
tvTitles.push(`"${tmdbItem.original_name}"`);
tvTitles.push(`"${tmdbItem.original_name}" ${tmdbItem.first_air_date.substring(0, 4)}`);
}

let totalResultsCount = 0;
const showResponse = await axios.get(getMdbInfo(imdbId.toString().trim()));
for (const season of showResponse.data.seasons
? showResponse.data.seasons
: [{ season_number: 1, episode_count: 0 }]) {
if (season.season_number === 0) continue;
let seasonQueries = tvTitles.map((q) => `${q} "s${padWithZero(season.season_number)}"`);
try {
const results = [];
for (const finalQuery of seasonQueries) {
for (const lType of ['720p', '1080p', '2160p', '']) {
results.push(
await scrapeResults(
createAxiosInstance(
new SocksProxyAgent(process.env.PROXY!, { timeout: 10000 })
),
`${finalQuery} ${lType}`.trim(),
lType || '1080p',
)
);
}
}
let processedResults = flattenAndRemoveDuplicates(results);
if (processedResults.length) processedResults = groupByParsedTitle(processedResults);

await db.saveScrapedResults<SearchResult[]>(`tv:${imdbId}:${season.season_number}`, processedResults);

totalResultsCount += processedResults.length;
} catch (error: any) {
res.status(500).json({
status: 'error',
errorMessage: `An error occurred while scraping Btdigg (${error.message})`,
});
}
// if (season.episode_count === 0) continue;
// for (let i = 1; i <= season.episode_count; i++) {
// seasonQueries = seasonQueries.concat(
// tvTitles.map(
// (q) =>
// `${q} "s${padWithZero(season.season_number)}e${padWithZero(i)}"`
// )
// );
// }
}

res.status(200).json({ status: `scraped: ${totalResultsCount} items` });
}
}
Loading

0 comments on commit 91762b9

Please sign in to comment.