Javascript truncate HTML text

Does JavaScript have a way of truncating HTML text without all the headaches of matching tags etc etc?

Thank you.

Answers:

Answer

There's nothing built-in javascript. There's a jQuery plugin that you might take a look at.

Answer

I had the same problem, and wound up writing the following to deal with it. It truncates HTML to a give length, cleans up any start / end tags that might have gotten snipped off at the end, and then closes any tags left unclosed:

function truncateHTML(text, length) {
    var truncated = text.substring(0, length);
    // Remove line breaks and surrounding whitespace
    truncated = truncated.replace(/(\r\n|\n|\r)/gm,"").trim();
    // If the text ends with an incomplete start tag, trim it off
    truncated = truncated.replace(/<(\w*)(?:(?:\s\w+(?:={0,1}(["']{0,1})\w*\2{0,1})))*$/g, '');
    // If the text ends with a truncated end tag, fix it.
    var truncatedEndTagExpr = /<\/((?:\w*))$/g;
    var truncatedEndTagMatch = truncatedEndTagExpr.exec(truncated);
    if (truncatedEndTagMatch != null) {
        var truncatedEndTag = truncatedEndTagMatch[1];
        // Check to see if there's an identifiable tag in the end tag
        if (truncatedEndTag.length > 0) {
            // If so, find the start tag, and close it
            var startTagExpr = new RegExp(
                "<(" + truncatedEndTag + "\\w?)(?:(?:\\s\\w+(?:=([\"\'])\\w*\\2)))*>");
            var testString = truncated;
            var startTagMatch = startTagExpr.exec(testString);

            var startTag = null;
            while (startTagMatch != null) {
                startTag = startTagMatch[1];
                testString = testString.replace(startTagExpr, '');
                startTagMatch = startTagExpr.exec(testString);
            }
            if (startTag != null) {
                truncated = truncated.replace(truncatedEndTagExpr, '</' + startTag + '>');
            }
        } else {
            // Otherwise, cull off the broken end tag
            truncated = truncated.replace(truncatedEndTagExpr, '');
        }
    }
    // Now the tricky part. Reverse the text, and look for opening tags. For each opening tag,
    //  check to see that he closing tag before it is for that tag. If not, append a closing tag.
    var testString = reverseHtml(truncated);
    var reverseTagOpenExpr = /<(?:(["'])\w*\1=\w+ )*(\w*)>/;
    var tagMatch = reverseTagOpenExpr.exec(testString);
    while (tagMatch != null) {
        var tag = tagMatch[0];
        var tagName = tagMatch[2];
        var startPos = tagMatch.index;
        var endPos = startPos + tag.length;
        var fragment = testString.substring(0, endPos);
        // Test to see if an end tag is found in the fragment. If not, append one to the end
        //  of the truncated HTML, thus closing the last unclosed tag
        if (!new RegExp("<" + tagName + "\/>").test(fragment)) {
            truncated += '</' + reverseHtml(tagName) + '>';
        }
        // Get rid of the already tested fragment
        testString = testString.replace(fragment, '');
        // Get another tag to test
        tagMatch = reverseTagOpenExpr.exec(testString);
    }
    return truncated;
}

function reverseHtml(str) {
    var ph = String.fromCharCode(206);
    var result = str.split('').reverse().join('');
    while (result.indexOf('<') > -1) {
        result = result.replace('<',ph);
    }
    while (result.indexOf('>') > -1) {
        result = result.replace('>', '<');
    }
    while (result.indexOf(ph) > -1) {
        result = result.replace(ph, '>');
    }
    return result;
}
Answer

I know this question is old, but I recently had the same problem. I wrote the following library, which truncates valid HTML safely: https://github.com/arendjr/text-clipper

Answer

There's a mootools plugin which does exactly what you need: mooReadAll at mootools forge

Answer

If you want a light-weight solution in vanilla JS, this should do the trick, although it'll leave empty elements around, so it depends on if you care about those. Also note that it mutates the nodes in-place.

function truncateNode(node, limit) {
  if (node.nodeType === Node.TEXT_NODE) {
    node.textContent = node.textContent.substring(0, limit);
    return limit - node.textContent.length;
  }

  node.childNodes.forEach((child) => {
    limit = truncateNode(child, limit);
  });

  return limit;
}
const span = document.createElement('span');
span.innerHTML = '<b>foo</b><i>bar</i><u>baz</u>';
truncateNode(span, 5);
expect(span.outerHTML).toEqual('<span><b>foo</b><i>ba</i><u></u></span>');
Answer

I just recently finished a jQuery function to do this using the width & height of the container. Test it out and see if it works for you. I'm not yet sure of all the compatibility issues, bugs, or limitations but I've tested it in FF, Chrome, and IE7.

Answer

None of the above solutions corresponded perfectly to my use case, so I created myself a small vanilla javascript function. It leaves empty elements but it could be corrected easily.

const truncateWithHTML = (string, length) => {
    // string = "<span class='className'>My long string that</span> I want shorter<span> but just a little bit</span>"

    const noHTML = string.replace(/<[^>]*>/g, '');

    // if the string does not need to be truncated
    if (noHTML.length <= max){
        return string;
    }

    // if the string does not contains tags
    if (noHTML.length === string.length){
        // add <span title=""> to allow complete string to appear on hover
        return `<span title="${string}">${string.substring(0, max).trim()}…</span>`;
    }

    const substrings =  string.split(/(<[^>]*>)/g).filter(Boolean);
    // substrings = ["<span class='className'>","My long string that","</span>"," I want shorter","<span>"," but just a little bit","</span>"]

    let count = 0;
    let truncated = [];
    for (let i = 0; i < substrings.length; i++) {
        let substr = substrings[i];
        // if the substring isn't an HTML tag
        if (! substr.startsWith("<")){
            if (count > length){
                continue;
            } else if (substr.length > (length-count-1)){
                truncated.push(substr.substring(0, (length-count) - 1) + '…');
            } else {
                truncated.push(substr);
            }
            count += substr.length;
        } else {
            truncated.push(substr);
        }
    }

    return `<span title="${noHTML}">${truncated.join("")}…</span>`;
}

Examples:

string = "<span class='className'>My long string that</span> I want shorter<span> but just a little bit</span>";

truncateWithHTML(string,10); // "<span title='My long string that I want shorter but just a little bit'><span class='className'>My long s…</span><span></span></span>"
truncateWithHTML(string,22); // "<span title='My long string that I want shorter but just a little bit'><span class='className'>My long string that</span> I…<span></span></span>"
Answer

That's quite challenging.

If you don't have any HTML markup, the following might be useful.

Tags

Recent Questions

Top Questions

Home Tags Terms of Service Privacy Policy DMCA Contact Us

©2020 All rights reserved.