Skip to content

Commit

Permalink
see #160 : featch all data from HAL
Browse files Browse the repository at this point in the history
  • Loading branch information
Guerik committed Nov 12, 2024
1 parent cf653ad commit 3faee62
Show file tree
Hide file tree
Showing 11 changed files with 163 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ export const createGetCompiledData = (db: Kysely<Database>) => async (): Promise
}): CompiledData.Software<"private"> => {
return {
...stripNullOrUndefinedValues(software),
addedByAgentEmail: agentById[addedByAgentId].email,
addedByAgentEmail: addedByAgentId ? agentById[addedByAgentId].email : undefined,
updateTime: new Date(+updateTime).getTime(),
referencedSinceTime: new Date(+referencedSinceTime).getTime(),
doRespectRgaa,
Expand Down
2 changes: 1 addition & 1 deletion api/src/core/adapters/dbApi/kysely/kysely.database.ts
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ type SoftwaresTable = {
>;
categories: JSONColumnType<string[]>;
generalInfoMd: string | null;
addedByAgentId: number;
addedByAgentId: number | null;
logoUrl: string | null;
keywords: JSONColumnType<string[]>;
};
Expand Down
50 changes: 48 additions & 2 deletions api/src/core/adapters/hal/getHalSoftware.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@ export async function fetchHalSoftwareById(halDocid: string): Promise<HalRawSoft
`https://api.archives-ouvertes.fr/search/?q=docid:${halDocid}&wt=json&fl=${halSoftwareFieldsToReturnAsString}&sort=docid%20asc`
).catch(() => undefined);

console.log("Hal response status : ", res?.status);

if (res === undefined) {
throw new HalFetchError(undefined);
}
Expand All @@ -59,3 +57,51 @@ export async function fetchHalSoftwareById(halDocid: string): Promise<HalRawSoft

return json.response.docs[0];
}

export async function fetchHalSoftwaresIds(): Promise<Array<string>> {
const url = `https://api.archives-ouvertes.fr/search/?q=docType_s:SOFTWARE&rows=10000&fl=docid`;

const res = await fetch(url).catch(() => undefined);

if (res === undefined) {
throw new HalFetchError(undefined);
}

if (res.status === 429) {
await new Promise(resolve => setTimeout(resolve, 100));
return fetchHalSoftwaresIds();
}

if (res.status === 404) {
throw new HalFetchError(res.status);
}

const json = await res.json();

return json.response.docs.map((doc : any) => doc.docid);
}

export async function fetchHalSoftwares(): Promise<Array<HalRawSoftware>> {
const url = `https://api.archives-ouvertes.fr/search/?q=docType_s:SOFTWARE&rows=10000&fl=${halSoftwareFieldsToReturnAsString}`;

const res = await fetch(url).catch(() => undefined);

console.debug("Hal response status : ", res?.status);

if (res === undefined) {
throw new HalFetchError(undefined);
}

if (res.status === 429) {
await new Promise(resolve => setTimeout(resolve, 100));
return fetchHalSoftwares();
}

if (res.status === 404) {
throw new HalFetchError(res.status);
}

const json = await res.json();

return json.response.docs;
}
62 changes: 53 additions & 9 deletions api/src/core/adapters/hal/halRawSoftware.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
import { Language, SoftwareExternalData } from "../../ports/GetSoftwareExternalData";
import { SoftwareExternalDataOption } from "../../ports/GetSoftwareExternalDataOptions";
import { SoftwareFormData } from "../../usecases/readWriteSillData";
import { parseBibliographicFields } from "./parseBibliographicFields";

export type HalAPIResponse<T> = {
numFound: number;
start: number;
maxScore: number;
numFoundExact: boolean;
docs: T
}

const halSoftwareFieldsToReturn: (keyof HalRawSoftware)[] = [
"en_abstract_s",
"en_title_s",
Expand All @@ -20,7 +29,7 @@ export const halSoftwareFieldsToReturnAsString = halSoftwareFieldsToReturn.join(

export const rawHalSoftwareToSoftwareExternalData = (halSoftware: HalRawSoftware): SoftwareExternalData => {
const bibliographicReferences = parseBibliographicFields(halSoftware.label_bibtex);
const license = bibliographicReferences.license.join(", ");
const license = bibliographicReferences.license ?bibliographicReferences.license.join(", ") : undefined;
const developers = bibliographicReferences.author.map(author => ({
id: author.toLowerCase().split(" ").join("-"),
name: author
Expand Down Expand Up @@ -78,9 +87,9 @@ export type HalRawSoftware = {
title_s: string[];
en_title_s?: string[];
fr_title_s?: string[];
abstract_s?: string[];
en_abstract_s?: string[];
fr_abstract_s?: string[];
abstract_s?: string[]; // 1030 / 1398
en_abstract_s?: string[]; // 896 / 1398
fr_abstract_s?: string[]; // 235 / 1398
uri_s: string;
openAccess_bool: boolean;
docType_s: string;
Expand All @@ -102,9 +111,9 @@ export type HalRawSoftware = {
// es_domainAllCodeLabel_fs: string[];
// eu_domainAllCodeLabel_fs: string[];
// primaryDomain_s: string;
// en_keyword_s?: string[];
// keyword_s: string[];
// fr_keyword_s?: string[];
// en_keyword_s?: string[]; // 711 / 1398
// keyword_s: string[]; // 786 / 1398
// fr_keyword_s?: string[]; // 184 / 1398
// authIdFormPerson_s: string[];
// authIdForm_i: number[];
// authLastName_s: string[];
Expand Down Expand Up @@ -159,7 +168,7 @@ export type HalRawSoftware = {
// contributorFullName_s: string;
// contributorIdFullName_fs: string;
// contributorFullNameId_fs: string;
// language_s: string[];
// language_s: string[]; // Could use
// halId_s: string;
// version_i: number;
// status_i: number;
Expand Down Expand Up @@ -209,9 +218,44 @@ export type HalRawSoftware = {
// collCategoryCodeName_fs: string[];
// collNameCode_fs: string[];
// fileMain_s: string;
// files_s: string[];
// files_s: string[]; // Could ontains zip code
// fileType_s: string[];
// _version_: bigint;
// dateLastIndexed_tdate: string;
// label_xml: string;
// softCodeRepository_s: string[]; // 727 / 1398
// softDevelopmentStatus_s: string[]; // 715 / 1398
// softPlatform_s:string[]; // 449 / 1398
// softProgrammingLanguage_s: string[]; // 929 / 1398
// softRuntimePlatform_s: string[]; // 195 / 1398
// softVersion_s: string[]; // 642 / 1398
// licence_s: string[]; // default licencse ? -> 20 / 1398
};

export const HalRawSoftwareToSoftwareForm = (halSoftware: HalRawSoftware): SoftwareFormData => {
const bibliographicReferences = parseBibliographicFields(halSoftware.label_bibtex);
const license = bibliographicReferences.license ?bibliographicReferences.license.join(", ") : undefined;

// TODO Mapping
const formData : SoftwareFormData = {
softwareName: halSoftware.title_s[0],
softwareDescription: halSoftware.abstract_s ? halSoftware.abstract_s[0] : '',
softwareType: {
type: "desktop/mobile",
os: {"linux": true, "windows": false, "android": false, "ios": false, "mac": false }
}, // TODO
externalId: halSoftware.docid,
comptoirDuLibreId: undefined,
softwareLicense: license || 'copyright', // TODO
softwareMinimalVersion: '1', // TODO
similarSoftwareExternalDataIds: [],
softwareLogoUrl: "https://www.gnu.org/graphics/gnu-head-30-years-anniversary.svg",
softwareKeywords: [],

isPresentInSupportContract: false,
isFromFrenchPublicService: false, // TODO comment
doRespectRgaa: null,
};

return formData;
}
7 changes: 7 additions & 0 deletions api/src/core/adapters/hal/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import { fetchHalSoftwares } from "./getHalSoftware";

export default {
software: {
getAll: fetchHalSoftwares,
}
}
17 changes: 17 additions & 0 deletions api/src/core/bootstrap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import type { UserApi } from "./ports/UserApi";
import { UseCases } from "./usecases";
import { makeGetAgent } from "./usecases/getAgent";
import { makeGetSoftwareFormAutoFillDataFromExternalAndOtherSources } from "./usecases/getSoftwareFormAutoFillDataFromExternalAndOtherSources";
import { feedDBfromHAL } from "./usecases/feedDB";
import { env as config} from "../env";

type PgDbConfig = { dbKind: "kysely"; kyselyDb: Kysely<Database> };

Expand Down Expand Up @@ -113,6 +115,21 @@ export async function bootstrapCore(
await initializeUserApiCache();
}

if (config.feedFromSource) {
if (config.externalSoftwareDataOrigin === 'HAL') {
console.log(' ------ Feeding database with HAL software started ------');
const HAL = feedDBfromHAL(dbApi);
try {
await HAL();
} catch(err) {
// catches errors both in fetch and response.json
console.error(err);
}

console.log(' ------ Feeding database with HAL software finished ------');
}
}

if (doPerPerformPeriodicalCompilation) {
const frequencyOfUpdate = 1000 * 60 * 60 * 4; // 4 hours

Expand Down
2 changes: 1 addition & 1 deletion api/src/core/ports/CompileData.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ export namespace CompiledData {
};

export type Private = Common & {
addedByAgentEmail: string;
addedByAgentEmail: string | undefined;
users: (Pick<Db.AgentRow, "organization"> &
Pick<Db.SoftwareUserRow, "os" | "serviceUrl" | "useCaseDescription" | "version">)[];
referents: (Pick<Db.AgentRow, "email" | "organization"> &
Expand Down
4 changes: 2 additions & 2 deletions api/src/core/ports/DbApiV2.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ export interface SoftwareRepository {
params: {
formData: SoftwareFormData;
externalDataOrigin: ExternalDataOrigin;
} & WithAgentId
} & (WithAgentId | {agentId : undefined})
) => Promise<number>;
update: (
params: {
softwareSillId: number;
formData: SoftwareFormData;
} & WithAgentId
} & (WithAgentId | {agentId : undefined})
) => Promise<void>;
updateLastExtraDataFetchAt: (params: { softwareId: number }) => Promise<void>;
getAll: (filters?: GetSoftwareFilters) => Promise<Software[]>;
Expand Down
20 changes: 20 additions & 0 deletions api/src/core/usecases/feedDB.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { DbApiV2 } from "../ports/DbApiV2";
import HAL from "../adapters/hal"
import { HalRawSoftwareToSoftwareForm } from "../adapters/hal/halRawSoftware";

export const feedDBfromHAL : any = (dbApi : DbApiV2) => {
return async () => {
const softwares = await HAL.software.getAll();

return softwares.map(async software => {
const newSoft = HalRawSoftwareToSoftwareForm(software);
const soft = await dbApi.software.getByName(newSoft.softwareName);
if (soft) {
return Promise.resolve(soft.id);
} else {
console.log('Importing HAL : ', software.docid);
return dbApi.software.create({ formData: newSoft, externalDataOrigin: 'HAL', agentId: undefined });
}
});
}
}
17 changes: 9 additions & 8 deletions api/src/core/usecases/readWriteSillData/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export type ServiceProvider = {
};

export type Software = {
id?: number;
logoUrl: string | undefined;
softwareId: number;
softwareName: string;
Expand Down Expand Up @@ -124,20 +125,20 @@ export type Prerogative = keyof Prerogatives;
export type Os = "windows" | "linux" | "mac" | "android" | "ios";

export type SoftwareFormData = {
softwareType: SoftwareType;
externalId: string | undefined;
comptoirDuLibreId: number | undefined;
softwareName: string;
softwareDescription: string;
softwareLicense: string;
softwareType: SoftwareType;
externalId: string | undefined; // Id
comptoirDuLibreId: number | undefined; // id on the library comptoir du libre
softwareLicense: string; // or default licence ?
softwareMinimalVersion: string;
similarSoftwareExternalDataIds: string[];
similarSoftwareExternalDataIds: string[]; //
softwareLogoUrl: string | undefined;
softwareKeywords: string[];

isPresentInSupportContract: boolean;
isFromFrenchPublicService: boolean;
doRespectRgaa: boolean | null;
isPresentInSupportContract: boolean; // ??
isFromFrenchPublicService: boolean; // Financed and developped by public service
doRespectRgaa: boolean | null; // référentiel général d’amélioration de l’accessibilité
};

export type DeclarationFormData = DeclarationFormData.User | DeclarationFormData.Referent;
Expand Down
6 changes: 4 additions & 2 deletions api/src/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ const zConfiguration = z.object({
// Completely disable this instance and redirect to another url
"redirectUrl": z.string().optional(),
"externalSoftwareDataOrigin": z.enum(["wikidata", "HAL"]).optional(),
"databaseUrl": z.string()
"databaseUrl": z.string(),
"feedFromSource": z.boolean(),
});

const getJsonConfiguration = () => {
Expand Down Expand Up @@ -65,7 +66,8 @@ const getJsonConfiguration = () => {
"isDevEnvironnement": process.env.SILL_IS_DEV_ENVIRONNEMENT?.toLowerCase() === "true",
"externalSoftwareDataOrigin": process.env.SILL_EXTERNAL_SOFTWARE_DATA_ORIGIN,
"redirectUrl": process.env.SILL_REDIRECT_URL,
"databaseUrl": process.env.DATABASE_URL
"databaseUrl": process.env.DATABASE_URL,
"feedFromSource": process.env.FEEDFROMSOURCE || false,
};
};

Expand Down

0 comments on commit 3faee62

Please sign in to comment.