link-stack/apps/bridge-worker/tasks/import-leafcutter.ts

175 lines
4.7 KiB
TypeScript
Raw Normal View History

/* eslint-disable camelcase */
import fetch from "node-fetch";
import { URLSearchParams } from "url";
import { withDb, AppDatabase } from "../db";
2024-04-21 09:44:30 +02:00
import { loadConfig } from "@digiresilience/bridge-config";
type LabelStudioTicket = {
2023-05-25 09:27:26 +00:00
id: string;
is_labeled: boolean;
annotations: Record<string, unknown>[];
data: Record<string, unknown>;
updated_at: string;
2023-05-25 09:27:26 +00:00
};
type LeafcutterTicket = {
2023-05-25 09:27:26 +00:00
id: string;
incident: string[];
technology: string[];
targeted_group: string[];
country: string[];
region: string[];
continent: string[];
date: Date;
origin: string;
origin_id: string;
source_created_at: string;
source_updated_at: string;
};
2024-04-21 09:44:30 +02:00
const getLabelStudioTickets = async (
page: number,
): Promise<LabelStudioTicket[]> => {
const {
2024-04-21 09:44:30 +02:00
leafcutter: { labelStudioApiUrl, labelStudioApiKey },
} = await loadConfig();
const headers = {
Authorization: `Token ${labelStudioApiKey}`,
Accept: "application/json",
};
const ticketsQuery = new URLSearchParams({
page_size: "50",
page: `${page}`,
});
2023-05-25 09:27:26 +00:00
console.log({ url: `${labelStudioApiUrl}/projects/1/tasks?${ticketsQuery}` });
2024-04-21 09:44:30 +02:00
const res = await fetch(
`${labelStudioApiUrl}/projects/1/tasks?${ticketsQuery}`,
{ headers },
);
2023-05-25 09:27:26 +00:00
console.log({ res });
const tasksResult: any = await res.json();
console.log({ tasksResult });
return tasksResult;
2023-05-25 09:27:26 +00:00
};
2024-04-21 09:44:30 +02:00
const fetchFromLabelStudio = async (
minUpdatedTimestamp: Date,
): Promise<LabelStudioTicket[]> => {
const pages = [...Array.from({ length: 10000 }).keys()];
const allDocs: LabelStudioTicket[] = [];
for await (const page of pages) {
const docs = await getLabelStudioTickets(page + 1);
2023-05-25 09:27:26 +00:00
console.log({ page, docs });
if (docs && docs.length > 0) {
for (const doc of docs) {
const updatedAt = new Date(doc.updated_at);
console.log({ updatedAt, minUpdatedTimestamp });
if (updatedAt > minUpdatedTimestamp) {
2023-05-25 09:27:26 +00:00
console.log(`Adding doc`, { doc });
allDocs.push(doc);
}
}
} else {
break;
}
}
2023-05-25 09:27:26 +00:00
console.log({ allDocs });
return allDocs;
2023-05-25 09:27:26 +00:00
};
const sendToLeafcutter = async (tickets: LabelStudioTicket[]) => {
const {
leafcutter: {
contributorId,
opensearchApiUrl,
opensearchUsername,
2024-04-21 09:44:30 +02:00
opensearchPassword,
},
} = await loadConfig();
2023-05-25 09:27:26 +00:00
console.log({ tickets });
const filteredTickets = tickets.filter((ticket) => ticket.is_labeled);
2023-05-25 09:27:26 +00:00
console.log({ filteredTickets });
const finalTickets: LeafcutterTicket[] = filteredTickets.map((ticket) => {
const {
id,
annotations,
2024-04-21 09:44:30 +02:00
data: { source_id, source_created_at, source_updated_at },
} = ticket;
const getTags = (tags: Record<string, any>[], name: string) =>
tags
.filter((tag) => tag.from_name === name)
.map((tag) => tag.value.choices)
.flat();
const allTags = annotations.map(({ result }) => result).flat();
const incident = getTags(allTags, "incidentType tag");
const technology = getTags(allTags, "platform tag");
const country = getTags(allTags, "country tag");
const targetedGroup = getTags(allTags, "targetedGroup tag");
return {
id,
incident,
technology,
targeted_group: targetedGroup,
country,
region: [],
continent: [],
date: new Date(source_created_at as string),
origin: contributorId,
origin_id: source_id as string,
source_created_at: source_created_at as string,
2024-04-21 09:44:30 +02:00
source_updated_at: source_updated_at as string,
};
});
console.log("Sending to Leafcutter");
2023-05-25 09:27:26 +00:00
console.log({ finalTickets });
const result = await fetch(opensearchApiUrl, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Basic ${Buffer.from(`${opensearchUsername}:${opensearchPassword}`).toString("base64")}`,
},
body: JSON.stringify({ tickets: finalTickets }),
});
console.log({ result });
};
const importLeafcutterTask = async (): Promise<void> => {
withDb(async (db: AppDatabase) => {
2024-04-21 09:44:30 +02:00
const {
leafcutter: { contributorName },
} = await loadConfig();
const settingName = `${contributorName}ImportLeafcutterTask`;
const res: any = await db.settings.findByName(settingName);
2024-04-21 09:44:30 +02:00
const startTimestamp = res?.value?.minUpdatedTimestamp
? new Date(res.value.minUpdatedTimestamp as string)
: new Date("2023-03-01");
const newLastTimestamp = new Date();
2024-04-21 09:44:30 +02:00
console.log({
contributorName,
settingName,
res,
startTimestamp,
newLastTimestamp,
});
const tickets = await fetchFromLabelStudio(startTimestamp);
2023-05-25 09:27:26 +00:00
console.log({ tickets });
await sendToLeafcutter(tickets);
2024-04-21 09:44:30 +02:00
await db.settings.upsert(settingName, {
minUpdatedTimestamp: newLastTimestamp,
});
});
};
export default importLeafcutterTask;