Make html-to-mfm and its callstack async

This commit is contained in:
Laura Hausmann 2023-09-11 22:34:00 +02:00
parent 86defdffd3
commit 552041726b
No known key found for this signature in database
GPG key ID: D044E84C5BE01605
6 changed files with 60 additions and 60 deletions

View file

@ -5,7 +5,7 @@ import { defaultTreeAdapter as treeAdapter } from "parse5";
const urlRegex = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+/;
const urlRegexFull = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+$/;
export function fromHtml(html: string, hashtagNames?: string[]): string {
export async function fromHtml(html: string, hashtagNames?: string[]): Promise<string> {
// some AP servers like Pixelfed use br tags as well as newlines
html = html.replace(/<br\s?\/?>\r?\n/gi, "\n");
@ -14,7 +14,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
let text = "";
for (const n of dom.childNodes) {
analyze(n);
await analyze(n);
}
return text.trim();
@ -31,15 +31,15 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
return "";
}
function appendChildren(childNodes: TreeAdapter.ChildNode[]): void {
async function appendChildren(childNodes: TreeAdapter.ChildNode[]): Promise<void> {
if (childNodes) {
for (const n of childNodes) {
analyze(n);
await analyze(n);
}
}
}
function analyze(node: TreeAdapter.Node) {
async function analyze(node: TreeAdapter.Node) {
if (treeAdapter.isTextNode(node)) {
text += node.value;
return;
@ -109,7 +109,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
case "h1": {
text += "【";
appendChildren(node.childNodes);
await appendChildren(node.childNodes);
text += "】\n";
break;
}
@ -117,14 +117,14 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
case "b":
case "strong": {
text += "**";
appendChildren(node.childNodes);
await appendChildren(node.childNodes);
text += "**";
break;
}
case "small": {
text += "<small>";
appendChildren(node.childNodes);
await appendChildren(node.childNodes);
text += "</small>";
break;
}
@ -132,7 +132,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
case "s":
case "del": {
text += "~~";
appendChildren(node.childNodes);
await appendChildren(node.childNodes);
text += "~~";
break;
}
@ -140,7 +140,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
case "i":
case "em": {
text += "<i>";
appendChildren(node.childNodes);
await appendChildren(node.childNodes);
text += "</i>";
break;
}
@ -155,7 +155,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
text += getText(node.childNodes[0]);
text += "\n```\n";
} else {
appendChildren(node.childNodes);
await appendChildren(node.childNodes);
}
break;
}
@ -163,7 +163,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
// inline code (<code>)
case "code": {
text += "`";
appendChildren(node.childNodes);
await appendChildren(node.childNodes);
text += "`";
break;
}
@ -184,7 +184,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
case "h5":
case "h6": {
text += "\n\n";
appendChildren(node.childNodes);
await appendChildren(node.childNodes);
break;
}
@ -197,13 +197,13 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
case "dt":
case "dd": {
text += "\n";
appendChildren(node.childNodes);
await appendChildren(node.childNodes);
break;
}
default: {
// includes inline elements
appendChildren(node.childNodes);
await appendChildren(node.childNodes);
break;
}
}

View file

@ -40,7 +40,7 @@ export async function importMastoPost(
job.progress(60);
let text;
try {
text = htmlToMfm(post.object.content, post.object.tag);
text = await htmlToMfm(post.object.content, post.object.tag);
} catch (e) {
throw e;
}

View file

@ -2,10 +2,10 @@ import type { IObject } from "../type.js";
import { extractApHashtagObjects } from "../models/tag.js";
import { fromHtml } from "../../../mfm/from-html.js";
export function htmlToMfm(html: string, tag?: IObject | IObject[]) {
export async function htmlToMfm(html: string, tag?: IObject | IObject[]) {
const hashtagNames = extractApHashtagObjects(tag)
.map((x) => x.name)
.filter((x): x is string => x != null);
return fromHtml(html, hashtagNames);
return await fromHtml(html, hashtagNames);
}

View file

@ -313,7 +313,7 @@ export async function createNote(
} else if (typeof note._misskey_content !== "undefined") {
text = note._misskey_content;
} else if (typeof note.content === "string") {
text = htmlToMfm(note.content, note.tag);
text = await htmlToMfm(note.content, note.tag);
}
// vote
@ -575,7 +575,7 @@ export async function updateNote(value: string | IObject, resolver?: Resolver) {
} else if (typeof post._misskey_content !== "undefined") {
text = post._misskey_content;
} else if (typeof post.content === "string") {
text = htmlToMfm(post.content, post.tag);
text = await htmlToMfm(post.content, post.tag);
}
const cw = post.sensitive && post.summary;

View file

@ -234,7 +234,7 @@ export async function createPerson(
}
}
const { fields } = analyzeAttachments(person.attachment || []);
const { fields } = await analyzeAttachments(person.attachment || []);
const tags = extractApHashtags(person.tag)
.map((tag) => normalizeForSearch(tag))
@ -335,7 +335,7 @@ export async function createPerson(
new UserProfile({
userId: user.id,
description: person.summary
? htmlToMfm(truncate(person.summary, summaryLength), person.tag)
? await htmlToMfm(truncate(person.summary, summaryLength), person.tag)
: null,
url: url,
fields,
@ -481,7 +481,7 @@ export async function updatePerson(
const emojiNames = emojis.map((emoji) => emoji.name);
const { fields } = analyzeAttachments(person.attachment || []);
const { fields } = await analyzeAttachments(person.attachment || []);
const tags = extractApHashtags(person.tag)
.map((tag) => normalizeForSearch(tag))
@ -591,7 +591,7 @@ export async function updatePerson(
url: url,
fields,
description: person.summary
? htmlToMfm(truncate(person.summary, summaryLength), person.tag)
? await htmlToMfm(truncate(person.summary, summaryLength), person.tag)
: null,
birthday: bday ? bday[0] : null,
location: person["vcard:Address"] || null,
@ -676,7 +676,7 @@ function addService(target: { [x: string]: any }, source: IApPropertyValue) {
}
}
export function analyzeAttachments(
export async function analyzeAttachments(
attachments: IObject | IObject[] | undefined,
) {
const fields: {
@ -692,7 +692,7 @@ export function analyzeAttachments(
} else {
fields.push({
name: attachment.name,
value: fromHtml(attachment.value),
value: await fromHtml(attachment.value),
});
}
}

View file

@ -19,106 +19,106 @@ describe("toHtml", () => {
});
describe("fromHtml", () => {
it("p", () => {
assert.deepStrictEqual(fromHtml("<p>a</p><p>b</p>"), "a\n\nb");
it("p", async () => {
assert.deepStrictEqual(await fromHtml("<p>a</p><p>b</p>"), "a\n\nb");
});
it("block element", () => {
assert.deepStrictEqual(fromHtml("<div>a</div><div>b</div>"), "a\nb");
it("block element", async () => {
assert.deepStrictEqual(await fromHtml("<div>a</div><div>b</div>"), "a\nb");
});
it("inline element", () => {
assert.deepStrictEqual(fromHtml("<ul><li>a</li><li>b</li></ul>"), "a\nb");
it("inline element", async () => {
assert.deepStrictEqual(await fromHtml("<ul><li>a</li><li>b</li></ul>"), "a\nb");
});
it("block code", () => {
it("block code", async () => {
assert.deepStrictEqual(
fromHtml("<pre><code>a\nb</code></pre>"),
await fromHtml("<pre><code>a\nb</code></pre>"),
"```\na\nb\n```",
);
});
it("inline code", () => {
assert.deepStrictEqual(fromHtml("<code>a</code>"), "`a`");
it("inline code", async () => {
assert.deepStrictEqual(await fromHtml("<code>a</code>"), "`a`");
});
it("quote", () => {
it("quote", async () => {
assert.deepStrictEqual(
fromHtml("<blockquote>a\nb</blockquote>"),
await fromHtml("<blockquote>a\nb</blockquote>"),
"> a\n> b",
);
});
it("br", () => {
assert.deepStrictEqual(fromHtml("<p>abc<br><br/>d</p>"), "abc\n\nd");
it("br", async () => {
assert.deepStrictEqual(await fromHtml("<p>abc<br><br/>d</p>"), "abc\n\nd");
});
it("link with different text", () => {
it("link with different text", async () => {
assert.deepStrictEqual(
fromHtml('<p>a <a href="https://iceshrimp.dev/b">c</a> d</p>'),
await fromHtml('<p>a <a href="https://iceshrimp.dev/b">c</a> d</p>'),
"a [c](https://iceshrimp.dev/b) d",
);
});
it("link with different text, but not encoded", () => {
it("link with different text, but not encoded", async () => {
assert.deepStrictEqual(
fromHtml('<p>a <a href="https://iceshrimp.dev/ä">c</a> d</p>'),
await fromHtml('<p>a <a href="https://iceshrimp.dev/ä">c</a> d</p>'),
"a [c](<https://iceshrimp.dev/ä>) d",
);
});
it("link with same text", () => {
it("link with same text", async () => {
assert.deepStrictEqual(
fromHtml(
await fromHtml(
'<p>a <a href="https://joiniceshrimp.org/b">https://joiniceshrimp.org/b</a> d</p>',
),
"a https://joiniceshrimp.org/b d",
);
});
it("link with same text, but not encoded", () => {
it("link with same text, but not encoded", async () => {
assert.deepStrictEqual(
fromHtml(
await fromHtml(
'<p>a <a href="https://joiniceshrimp.org/ä">https://joiniceshrimp.org/ä</a> d</p>',
),
"a <https://joiniceshrimp.org/ä> d",
);
});
it("link with no url", () => {
it("link with no url", async () => {
assert.deepStrictEqual(
fromHtml('<p>a <a href="b">c</a> d</p>'),
await fromHtml('<p>a <a href="b">c</a> d</p>'),
"a [c](b) d",
);
});
it("link without href", () => {
assert.deepStrictEqual(fromHtml("<p>a <a>c</a> d</p>"), "a c d");
it("link without href", async () => {
assert.deepStrictEqual(await fromHtml("<p>a <a>c</a> d</p>"), "a c d");
});
it("link without text", () => {
it("link without text", async () => {
assert.deepStrictEqual(
fromHtml('<p>a <a href="https://joiniceshrimp.org/b"></a> d</p>'),
await fromHtml('<p>a <a href="https://joiniceshrimp.org/b"></a> d</p>'),
"a https://joiniceshrimp.org/b d",
);
});
it("link without both", () => {
assert.deepStrictEqual(fromHtml("<p>a <a></a> d</p>"), "a d");
it("link without both", async () => {
assert.deepStrictEqual(await fromHtml("<p>a <a></a> d</p>"), "a d");
});
it("mention", () => {
it("mention", async () => {
assert.deepStrictEqual(
fromHtml(
await fromHtml(
'<p>a <a href="https://joiniceshrimp.org/@user" class="u-url mention">@user</a> d</p>',
),
"a @user@joiniceshrimp.org d",
);
});
it("hashtag", () => {
it("hashtag", async () => {
assert.deepStrictEqual(
fromHtml('<p>a <a href="https://joiniceshrimp.org/tags/a">#a</a> d</p>', [
await fromHtml('<p>a <a href="https://joiniceshrimp.org/tags/a">#a</a> d</p>', [
"#a",
]),
"a #a d",