Files
vue-highlights/src/utils/extractUrls.js
Pedro G. Galaviz 4a99a58085 first commit
2019-11-28 20:55:32 -06:00

82 lines
2.3 KiB
JavaScript

// Extracts URLs from text
import { extractUrl, validAsciiDomain } from './regex'
import idna from './idna'
const DEFAULT_PROTOCOL = 'https://'
const DEFAULT_PROTOCOL_OPTIONS = { extractUrlsWithoutProtocol: true }
const MAX_URL_LENGTH = 4096
const invalidUrlWithoutProtocolPrecedingChars = /[-_./]$/
function isValidUrl (url, protocol, domain) {
let urlLength = url.length
const punycodeEncodedDomain = idna.toAscii(domain)
if (!punycodeEncodedDomain || !punycodeEncodedDomain.length) {
return false
}
urlLength = urlLength + punycodeEncodedDomain.length - domain.length
return protocol.length + urlLength <= MAX_URL_LENGTH
}
const extractUrlsWithIndices = function (text, options = DEFAULT_PROTOCOL_OPTIONS) {
if (!text || (options.extractUrlsWithoutProtocol ? !text.match(/\./) : !text.match(/:/))) {
return []
}
const urls = []
while (extractUrl.exec(text)) {
const before = RegExp.$2
let url = RegExp.$3
const protocol = RegExp.$4
const domain = RegExp.$5
const path = RegExp.$7
let endPosition = extractUrl.lastIndex
const startPosition = endPosition - url.length
if (!isValidUrl(url, protocol || DEFAULT_PROTOCOL, domain)) {
continue
}
// extract ASCII-only domains.
if (!protocol) {
if (!options.extractUrlsWithoutProtocol || before.match(invalidUrlWithoutProtocolPrecedingChars)) {
continue
}
let lastUrl = null
let asciiEndPosition = 0
domain.replace(validAsciiDomain, function (asciiDomain) {
const asciiStartPosition = domain.indexOf(asciiDomain, asciiEndPosition)
asciiEndPosition = asciiStartPosition + asciiDomain.length
lastUrl = {
url: asciiDomain,
indices: [startPosition + asciiStartPosition, startPosition + asciiEndPosition]
}
urls.push(lastUrl)
})
// no ASCII-only domain found. Skip the entire URL.
if (lastUrl == null) {
continue
}
// lastUrl only contains domain. Need to add path and query if they exist.
if (path) {
lastUrl.url = url.replace(domain, lastUrl.url)
lastUrl.indices[1] = endPosition
}
} else {
urls.push({
url: url,
indices: [startPosition, endPosition]
})
}
}
return urls
}
export default extractUrlsWithIndices