Fix HTML to MFM (#7150)

* Fix type

* Fix HTML to MFM
This commit is contained in:
MeiMei 2021-02-06 21:44:46 +09:00 committed by GitHub
parent b7ed6a6c56
commit a1bbf7d9ca
2 changed files with 55 additions and 19 deletions

View file

@ -1,8 +1,10 @@
import { parseFragment, DefaultTreeDocumentFragment } from 'parse5'; import * as parse5 from 'parse5';
import { urlRegexFull } from './prelude'; import treeAdapter = require('parse5/lib/tree-adapters/default');
import { URL } from 'url';
import { urlRegex, urlRegexFull } from './prelude';
export function fromHtml(html: string, hashtagNames?: string[]): string { export function fromHtml(html: string, hashtagNames?: string[]): string {
const dom = parseFragment(html) as DefaultTreeDocumentFragment; const dom = parse5.parseFragment(html);
let text = ''; let text = '';
@ -12,30 +14,35 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
return text.trim(); return text.trim();
function getText(node: any): string { function getText(node: parse5.Node): string {
if (node.nodeName === '#text') return node.value; if (treeAdapter.isTextNode(node)) return node.value;
if (!treeAdapter.isElementNode(node)) return '';
if (node.childNodes) { if (node.childNodes) {
return node.childNodes.map((n: any) => getText(n)).join(''); return node.childNodes.map(n => getText(n)).join('');
} }
return ''; return '';
} }
function analyze(node: any) { function analyze(node: parse5.Node) {
switch (node.nodeName) { if (treeAdapter.isTextNode(node)) {
case '#text': text += node.value;
text += node.value; return;
break; }
// Skip comment or document type node
if (!treeAdapter.isElementNode(node)) return;
switch (node.nodeName) {
case 'br': case 'br':
text += '\n'; text += '\n';
break; break;
case 'a': case 'a':
const txt = getText(node); const txt = getText(node);
const rel = node.attrs.find((x: any) => x.name === 'rel'); const rel = node.attrs.find(x => x.name === 'rel');
const href = node.attrs.find((x: any) => x.name === 'href'); const href = node.attrs.find(x => x.name === 'href');
// ハッシュタグ // ハッシュタグ
if (hashtagNames && href && hashtagNames.map(x => x.toLowerCase()).includes(txt.toLowerCase())) { if (hashtagNames && href && hashtagNames.map(x => x.toLowerCase()).includes(txt.toLowerCase())) {
@ -44,7 +51,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
} else if (txt.startsWith('@') && !(rel && rel.value.match(/^me /))) { } else if (txt.startsWith('@') && !(rel && rel.value.match(/^me /))) {
const part = txt.split('@'); const part = txt.split('@');
if (part.length === 2) { if (part.length === 2 && href) {
//#region ホスト名部分が省略されているので復元する //#region ホスト名部分が省略されているので復元する
const acct = `${txt}@${(new URL(href.value)).hostname}`; const acct = `${txt}@${(new URL(href.value)).hostname}`;
text += acct; text += acct;
@ -54,11 +61,28 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
} }
// その他 // その他
} else { } else {
text += !href ? txt const generateLink = () => {
: txt === href.value if (!href && !txt) {
? txt.match(urlRegexFull) ? txt return '';
: `<${txt}>` }
: `[${txt}](${href.value})`; if (!href) {
return txt;
}
if (!txt || txt === href.value) { // #6383: Missing text node
if (href.value.match(urlRegexFull)) {
return href.value;
} else {
return `<${href.value}>`;
}
}
if (href.value.match(urlRegex) && !href.value.match(urlRegexFull)) {
return `[${txt}](<${href.value}>)`; // #6846
} else {
return `[${txt}](${href.value})`;
}
};
text += generateLink();
} }
break; break;

View file

@ -1167,6 +1167,10 @@ describe('fromHtml', () => {
assert.deepStrictEqual(fromHtml('<p>a <a href="https://example.com/b">c</a> d</p>'), 'a [c](https://example.com/b) d'); assert.deepStrictEqual(fromHtml('<p>a <a href="https://example.com/b">c</a> d</p>'), 'a [c](https://example.com/b) d');
}); });
it('link with different text, but not encoded', () => {
assert.deepStrictEqual(fromHtml('<p>a <a href="https://example.com/ä">c</a> d</p>'), 'a [c](<https://example.com/ä>) d');
});
it('link with same text', () => { it('link with same text', () => {
assert.deepStrictEqual(fromHtml('<p>a <a href="https://example.com/b">https://example.com/b</a> d</p>'), 'a https://example.com/b d'); assert.deepStrictEqual(fromHtml('<p>a <a href="https://example.com/b">https://example.com/b</a> d</p>'), 'a https://example.com/b d');
}); });
@ -1183,6 +1187,14 @@ describe('fromHtml', () => {
assert.deepStrictEqual(fromHtml('<p>a <a>c</a> d</p>'), 'a c d'); assert.deepStrictEqual(fromHtml('<p>a <a>c</a> d</p>'), 'a c d');
}); });
it('link without text', () => {
assert.deepStrictEqual(fromHtml('<p>a <a href="https://example.com/b"></a> d</p>'), 'a https://example.com/b d');
});
it('link without both', () => {
assert.deepStrictEqual(fromHtml('<p>a <a></a> d</p>'), 'a d');
});
it('mention', () => { it('mention', () => {
assert.deepStrictEqual(fromHtml('<p>a <a href="https://example.com/@user" class="u-url mention">@user</a> d</p>'), 'a @user@example.com d'); assert.deepStrictEqual(fromHtml('<p>a <a href="https://example.com/@user" class="u-url mention">@user</a> d</p>'), 'a @user@example.com d');
}); });