From a1bbf7d9ca9a6a8536022742dff9e1565a03eeb1 Mon Sep 17 00:00:00 2001 From: MeiMei <30769358+mei23@users.noreply.github.com> Date: Sat, 6 Feb 2021 21:44:46 +0900 Subject: [PATCH] Fix HTML to MFM (#7150) * Fix type * Fix HTML to MFM --- src/mfm/from-html.ts | 62 ++++++++++++++++++++++++++++++-------------- test/mfm.ts | 12 +++++++++ 2 files changed, 55 insertions(+), 19 deletions(-) diff --git a/src/mfm/from-html.ts b/src/mfm/from-html.ts index 4c27c2cbb..0b4f9b894 100644 --- a/src/mfm/from-html.ts +++ b/src/mfm/from-html.ts @@ -1,8 +1,10 @@ -import { parseFragment, DefaultTreeDocumentFragment } from 'parse5'; -import { urlRegexFull } from './prelude'; +import * as parse5 from 'parse5'; +import treeAdapter = require('parse5/lib/tree-adapters/default'); +import { URL } from 'url'; +import { urlRegex, urlRegexFull } from './prelude'; export function fromHtml(html: string, hashtagNames?: string[]): string { - const dom = parseFragment(html) as DefaultTreeDocumentFragment; + const dom = parse5.parseFragment(html); let text = ''; @@ -12,30 +14,35 @@ export function fromHtml(html: string, hashtagNames?: string[]): string { return text.trim(); - function getText(node: any): string { - if (node.nodeName === '#text') return node.value; + function getText(node: parse5.Node): string { + if (treeAdapter.isTextNode(node)) return node.value; + if (!treeAdapter.isElementNode(node)) return ''; if (node.childNodes) { - return node.childNodes.map((n: any) => getText(n)).join(''); + return node.childNodes.map(n => getText(n)).join(''); } return ''; } - function analyze(node: any) { - switch (node.nodeName) { - case '#text': - text += node.value; - break; + function analyze(node: parse5.Node) { + if (treeAdapter.isTextNode(node)) { + text += node.value; + return; + } + // Skip comment or document type node + if (!treeAdapter.isElementNode(node)) return; + + switch (node.nodeName) { case 'br': text += '\n'; break; case 'a': const txt = getText(node); - const rel = node.attrs.find((x: any) => x.name === 'rel'); - const href = node.attrs.find((x: any) => x.name === 'href'); + const rel = node.attrs.find(x => x.name === 'rel'); + const href = node.attrs.find(x => x.name === 'href'); // ハッシュタグ if (hashtagNames && href && hashtagNames.map(x => x.toLowerCase()).includes(txt.toLowerCase())) { @@ -44,7 +51,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string { } else if (txt.startsWith('@') && !(rel && rel.value.match(/^me /))) { const part = txt.split('@'); - if (part.length === 2) { + if (part.length === 2 && href) { //#region ホスト名部分が省略されているので復元する const acct = `${txt}@${(new URL(href.value)).hostname}`; text += acct; @@ -54,11 +61,28 @@ export function fromHtml(html: string, hashtagNames?: string[]): string { } // その他 } else { - text += !href ? txt - : txt === href.value - ? txt.match(urlRegexFull) ? txt - : `<${txt}>` - : `[${txt}](${href.value})`; + const generateLink = () => { + if (!href && !txt) { + return ''; + } + if (!href) { + return txt; + } + if (!txt || txt === href.value) { // #6383: Missing text node + if (href.value.match(urlRegexFull)) { + return href.value; + } else { + return `<${href.value}>`; + } + } + if (href.value.match(urlRegex) && !href.value.match(urlRegexFull)) { + return `[${txt}](<${href.value}>)`; // #6846 + } else { + return `[${txt}](${href.value})`; + } + }; + + text += generateLink(); } break; diff --git a/test/mfm.ts b/test/mfm.ts index a32457e89..0a120f96e 100644 --- a/test/mfm.ts +++ b/test/mfm.ts @@ -1167,6 +1167,10 @@ describe('fromHtml', () => { assert.deepStrictEqual(fromHtml('
a c d
'), 'a [c](https://example.com/b) d'); }); + it('link with different text, but not encoded', () => { + assert.deepStrictEqual(fromHtml('a c d
'), 'a [c](a c d
'), 'a c d'); }); + it('link without text', () => { + assert.deepStrictEqual(fromHtml(''), 'a https://example.com/b d'); + }); + + it('link without both', () => { + assert.deepStrictEqual(fromHtml(''), 'a d'); + }); + it('mention', () => { assert.deepStrictEqual(fromHtml('a @user d
'), 'a @user@example.com d'); });