ฟังก์ชั่นตัดข้อความ HTML ตามความยาวที่กำหนด (UTF-8)

ฟังก์ชั่นตัดสตริงค์ สามารถตัดข้อความใน tag HTML ได้โดยไม่นับความยาวของข้อความส่วนที่เป็น tag สามารถให้ผลลัพท์ได้ทั้ง ข้อความธรรมดา และ ข้อความ HTML

ฟังก์ชั่นนี้ใช้งานกับ UTF-8 เท่านั้น
/*

* ฟังก์ชั่นตัดข้อความ HTML ตามความยาวที่กำหนด (UTF-8)

* @param string  $text ข้อความ (HTML)

* @param integer $length ความยาวของข้อความที่ต้องการ ( รวม ข้อความต่อท้าย $ending )

* @param string  $ending ข้อความที่จะนำไปเชื่อมต่อที่จุดสุดท้ายของข้อความที่ถูกตัด

* @param boolean $exact กำหนดให้ตัดข้อความ tag หรือไม่ (true=ตัด tag,false ตัดเป็นข้อความธรรมดา)

* @param boolean $considerHtml กำหนดให้คืนค่าข้อความภายใน tag หรือไม่ (true=คืน)

* @return string คืนค่าข้อความที่ถูกตัดแ้ล้ว

*/

function truncate( $text , $length = 100 , $ending = '...' , $exact = true , $considerHtml = true ) 

{

    if ( $considerHtml )

    {

        // if the plain text is shorter than the maximum length, return the whole text

        if ( strlen_utf8 ( preg_replace( '/<.*?>/u' , '' , $text ) ) <= $length )

        {

            return $text;

        };

   

        // splits all html-tags to scanable lines

        preg_match_all( '/(<.+?>)?([^<>]*)/su' , $text , $lines , PREG_SET_ORDER );

   

        $total_length = strlen_utf8( $ending );

        $open_tags = array();

        $truncate = '';

   

        foreach ( $lines as $line_matchings ) 

        {

            // if there is any html-tag in this line, handle it and add it (uncounted) to the output

            if ( !empty( $line_matchings[1] ) ) 

            {

                // if it's an "empty element" with or without xhtml-conform closing slash (f.e. <br/>)

                if ( preg_match( '/^<(\s*.+?\/\s*|\s*(img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param)(\s.+?)?)>$/isu' , $line_matchings[1] ) )

                {

                    // do nothing

                }

                // if tag is a closing tag (f.e. </b>)

                else if ( preg_match('/^<\s*\/([^\s]+?)\s*>$/us', $line_matchings[1] , $tag_matchings) )

                {

                    // delete tag from $open_tags list

                    $pos = array_search( $tag_matchings[1] , $open_tags );

                    if ( $pos !== false )

                    {

                        unset( $open_tags[$pos] );

                    }

                }

                // if tag is an opening tag (f.e. <b>)

                else if ( preg_match( '/^<\s*([^\s>!]+).*?>$/su' , $line_matchings[1] , $tag_matchings ) )

                {

                    // add tag to the beginning of $open_tags list

                    array_unshift( $open_tags , strtolower( $tag_matchings[1] ) );

                }

                // add html-tag to $truncate'd text

                $truncate .= $line_matchings[1];

            }

   

            // calculate the length of the plain text part of the line; handle entities as one character

            $content_length = strlen_utf8( preg_replace( '/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/ui' , ' ' , $line_matchings[2] ) );

            if ( $total_length + $content_length > $length )

            {

                // the number of characters which are left

                $left = $length - $total_length;

                $entities_length = 0;

                // search for html entities

                if ( preg_match_all( '/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/iu' , $line_matchings[2] , $entities , PREG_OFFSET_CAPTURE ) )

                {

                    // calculate the real length of all entities in the legal range

                    foreach ( $entities[0] as $entity )

                    {

                        if ( $entity[1] + 1 - $entities_length <= $left )

                        {

                            $left--;

                            $entities_length += strlen_utf8( $entity[0] );

                        }

                        else

                        {

                            // no more characters left

                            break;

                        }

                    }

                }

                $truncate .= substr_utf8( $line_matchings[2] , 0 , $left + $entities_length );

                // maximum lenght is reached, so get off the loop

                break;

            }

            else

            {

                $truncate .= $line_matchings[2];

                $total_length += $content_length;

            }

   

            // if the maximum length is reached, get off the loop

            if( $total_length >= $length)

            {

                break;

            }

        }

    }

    else 

    {

        if ( strlen_utf8( $text ) <= $length)

        {

            return $text;

        }

        else

        {

            $truncate = substr_utf8( $text , 0 , $length - strlen_utf8( $ending ) );

        }

    }

   

    // if the words shouldn't be cut in the middle...

    if ( !$exact )

    {

        // ...search the last occurance of a space...

        $spacepos = strrpos( $truncate , ' ' );

        if ( isset( $spacepos ) )

        {

            // ...and cut the text in this position

            $truncate = substr_utf8( $truncate , 0 , $spacepos );

        }

    }

   

    // add the defined ending to the text

    $truncate .= $ending;

   

    if( $considerHtml )

    {

        // close all unclosed html-tags

        foreach ( $open_tags as $tag )

        {

            $truncate .= '</' . $tag . '>';

        }

    }

   

    return $truncate;

};
ตัวอย่าง :
<?php
echo truncate( 'ทดสอบ <a href="https://www.goragod.com">goragod.com</a> ทดสอบ' , 10 );
?>