wcurl (10622B)
1 #!/bin/sh 2 3 # wcurl - a simple wrapper around curl to easily download files. 4 # 5 # Requires curl >= 7.46.0 (2015) 6 # 7 # Copyright (C) Samuel Henrique <samueloph@debian.org>, Sergio Durigan 8 # Junior <sergiodj@debian.org> and many contributors, see the AUTHORS 9 # file. 10 # 11 # Permission to use, copy, modify, and distribute this software for any purpose 12 # with or without fee is hereby granted, provided that the above copyright 13 # notice and this permission notice appear in all copies. 14 # 15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN 18 # NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 19 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 21 # OR OTHER DEALINGS IN THE SOFTWARE. 22 # 23 # Except as contained in this notice, the name of a copyright holder shall not be 24 # used in advertising or otherwise to promote the sale, use or other dealings in 25 # this Software without prior written authorization of the copyright holder. 26 # 27 # SPDX-License-Identifier: curl 28 29 # Stop on errors and on usage of unset variables. 30 set -eu 31 32 VERSION="2025.05.26" 33 34 PROGRAM_NAME="$(basename "$0")" 35 readonly PROGRAM_NAME 36 37 # Display the version. 38 print_version() 39 { 40 cat << _EOF_ 41 ${VERSION} 42 _EOF_ 43 } 44 45 # Display the program usage. 46 usage() 47 { 48 cat << _EOF_ 49 ${PROGRAM_NAME} -- a simple wrapper around curl to easily download files. 50 51 Usage: ${PROGRAM_NAME} <URL>... 52 ${PROGRAM_NAME} [--curl-options <CURL_OPTIONS>]... [--no-decode-filename] [-o|-O|--output <PATH>] [--dry-run] [--] <URL>... 53 ${PROGRAM_NAME} [--curl-options=<CURL_OPTIONS>]... [--no-decode-filename] [--output=<PATH>] [--dry-run] [--] <URL>... 54 ${PROGRAM_NAME} -h|--help 55 ${PROGRAM_NAME} -V|--version 56 57 Options: 58 59 --curl-options <CURL_OPTIONS>: Specify extra options to be passed when invoking curl. May be 60 specified more than once. 61 62 -o, -O, --output <PATH>: Use the provided output path instead of getting it from the URL. If 63 multiple URLs are provided, resulting files share the same name with a 64 number appended to the end (curl >= 7.83.0). If this option is provided 65 multiple times, only the last value is considered. 66 67 --no-decode-filename: Don't percent-decode the output filename, even if the percent-encoding in 68 the URL was done by wcurl, e.g.: The URL contained whitespaces. 69 70 --dry-run: Don't actually execute curl, just print what would be invoked. 71 72 -V, --version: Print version information. 73 74 -h, --help: Print this usage message. 75 76 <CURL_OPTIONS>: Any option supported by curl can be set here. This is not used by wcurl; it is 77 instead forwarded to the curl invocation. 78 79 <URL>: URL to be downloaded. Anything that is not a parameter is considered 80 an URL. Whitespaces are percent-encoded and the URL is passed to curl, which 81 then performs the parsing. May be specified more than once. 82 _EOF_ 83 } 84 85 # Display an error message and bail out. 86 error() 87 { 88 printf "%s\n" "$*" > /dev/stderr 89 exit 1 90 } 91 92 # Extra curl options provided by the user. 93 # This is set per-URL for every URL provided. 94 # Some options are global, but we are erroring on the side of needlesly setting 95 # them multiple times instead of causing issues with parameters that needs to 96 # be set per-URL. 97 CURL_OPTIONS="" 98 99 # The URLs to be downloaded. 100 URLS="" 101 102 # Variable used to be set to the percent-decoded filename parsed from the URL, unless 103 # --output or --no-decode-filename are used. 104 OUTPUT_PATH="" 105 HAS_USER_SET_OUTPUT="false" 106 107 # The parameters that are passed per-URL to curl. 108 readonly PER_URL_PARAMETERS="\ 109 --fail \ 110 --globoff \ 111 --location \ 112 --proto-default https \ 113 --remote-time \ 114 --retry 5 " 115 116 # Whether to invoke curl or not. 117 DRY_RUN="false" 118 119 # Sanitize parameters. 120 sanitize() 121 { 122 if [ -z "${URLS}" ]; then 123 error "You must provide at least one URL to download." 124 fi 125 126 readonly CURL_OPTIONS URLS DRY_RUN HAS_USER_SET_OUTPUT 127 } 128 129 # Indicate via exit code whether the string given in the first parameter 130 # consists solely of characters from the string given in the second parameter. 131 # In other words, it returns 0 if the first parameter only contains characters 132 # from the second parameter, e.g.: Are $1 characters a subset of $2 characters? 133 is_subset_of() 134 { 135 case "${1}" in 136 *[!${2}]*|'') return 1;; 137 esac 138 } 139 140 # Print the given string percent-decoded. 141 percent_decode() 142 { 143 # Encodings of control characters (00-1F) are passed through without decoding. 144 # Iterate on the input character-by-character, decoding it. 145 printf "%s\n" "${1}" | fold -w1 | while IFS= read -r decode_out; do 146 # If character is a "%", read the next character as decode_hex1. 147 if [ "${decode_out}" = % ] && IFS= read -r decode_hex1; then 148 decode_out="${decode_out}${decode_hex1}" 149 # If there's one more character, read it as decode_hex2. 150 if IFS= read -r decode_hex2; then 151 decode_out="${decode_out}${decode_hex2}" 152 # Skip decoding if this is a control character (00-1F). 153 # Skip decoding if DECODE_FILENAME is not "true". 154 if is_subset_of "${decode_hex1}" "23456789abcdefABCDEF" && \ 155 is_subset_of "${decode_hex2}" "0123456789abcdefABCDEF" && \ 156 [ "${DECODE_FILENAME}" = "true" ]; then 157 # Use printf to decode it into octal and then decode it to the final format. 158 decode_out="$(printf "%b" "\\$(printf %o "0x${decode_hex1}${decode_hex2}")")" 159 fi 160 fi 161 fi 162 printf %s "${decode_out}" 163 done 164 } 165 166 # Print the percent-decoded filename portion of the given URL. 167 get_url_filename() 168 { 169 # Remove protocol and query string if present. 170 hostname_and_path="$(printf %s "${1}" | sed -e 's,^[^/]*//,,' -e 's,?.*$,,')" 171 # If what remains contains a slash, there's a path; return it percent-decoded. 172 case "${hostname_and_path}" in 173 # sed to remove everything preceding the last '/', e.g.: "example/something" becomes "something" 174 */*) percent_decode "$(printf %s "${hostname_and_path}" | sed -e 's,^.*/,,')";; 175 esac 176 # No slash means there was just a hostname and no path; return empty string. 177 } 178 179 # Execute curl with the list of URLs provided by the user. 180 exec_curl() 181 { 182 CMD="curl " 183 184 # Store version to check if it supports --no-clobber and --parallel. 185 curl_version=$($CMD --version | cut -f2 -d' ' | head -n1) 186 curl_version_major=$(echo "$curl_version" | cut -f1 -d.) 187 curl_version_minor=$(echo "$curl_version" | cut -f2 -d.) 188 189 CURL_HAS_NO_CLOBBER="" 190 CURL_HAS_PARALLEL="" 191 # --no-clobber is only supported since 7.83.0. 192 # --parallel is only supported since 7.66.0. 193 if [ "${curl_version_major}" -ge 8 ]; then 194 CURL_HAS_NO_CLOBBER="--no-clobber" 195 CURL_HAS_PARALLEL="--parallel" 196 elif [ "${curl_version_major}" -eq 7 ];then 197 if [ "${curl_version_minor}" -ge 83 ]; then 198 CURL_HAS_NO_CLOBBER="--no-clobber" 199 fi 200 if [ "${curl_version_minor}" -ge 66 ]; then 201 CURL_HAS_PARALLEL="--parallel" 202 fi 203 fi 204 205 # Detecting whether we need --parallel. It's easier to rely on 206 # the shell's argument parsing. 207 # shellcheck disable=SC2086 208 set -- $URLS 209 210 if [ "$#" -gt 1 ]; then 211 CURL_PARALLEL="$CURL_HAS_PARALLEL" 212 else 213 CURL_PARALLEL="" 214 fi 215 216 # Start assembling the command. 217 # 218 # We use 'set --' here (again) because (a) we don't have arrays on 219 # POSIX shell, and (b) we need better control over the way we 220 # split arguments. 221 # 222 # shellcheck disable=SC2086 223 set -- ${CMD} ${CURL_PARALLEL} 224 225 NEXT_PARAMETER="" 226 for url in ${URLS}; do 227 # If the user did not provide an output path, define one. 228 if [ "${HAS_USER_SET_OUTPUT}" = "false" ]; then 229 OUTPUT_PATH="$(get_url_filename "${url}")" 230 # If we could not get a path from the URL, use the default: index.html. 231 [ -z "${OUTPUT_PATH}" ] && OUTPUT_PATH=index.html 232 fi 233 # shellcheck disable=SC2086 234 set -- "$@" ${NEXT_PARAMETER} ${PER_URL_PARAMETERS} ${CURL_HAS_NO_CLOBBER} ${CURL_OPTIONS} --output "${OUTPUT_PATH}" "${url}" 235 NEXT_PARAMETER="--next" 236 done 237 238 if [ "${DRY_RUN}" = "false" ]; then 239 exec "$@" 240 else 241 printf "%s\n" "$@" 242 fi 243 } 244 245 # Default to decoding the output filename 246 DECODE_FILENAME="true" 247 248 # Use "${1-}" in order to avoid errors because of 'set -u'. 249 while [ -n "${1-}" ]; do 250 case "${1}" in 251 --curl-options=*) 252 opt=$(printf "%s\n" "${1}" | sed 's/^--curl-options=//') 253 CURL_OPTIONS="${CURL_OPTIONS} ${opt}" 254 ;; 255 256 --curl-options) 257 shift 258 CURL_OPTIONS="${CURL_OPTIONS} ${1}" 259 ;; 260 261 --dry-run) 262 DRY_RUN="true" 263 ;; 264 265 --output=*) 266 opt=$(printf "%s\n" "${1}" | sed 's/^--output=//') 267 HAS_USER_SET_OUTPUT="true" 268 OUTPUT_PATH="${opt}" 269 ;; 270 271 -o|-O|--output) 272 shift 273 HAS_USER_SET_OUTPUT="true" 274 OUTPUT_PATH="${1}" 275 ;; 276 277 -o*|-O*) 278 opt=$(printf "%s\n" "${1}" | sed 's/^-[oO]//') 279 HAS_USER_SET_OUTPUT="true" 280 OUTPUT_PATH="${opt}" 281 ;; 282 283 --no-decode-filename) 284 DECODE_FILENAME="false" 285 ;; 286 287 -h|--help) 288 usage 289 exit 0 290 ;; 291 292 -V|--version) 293 print_version 294 exit 0 295 ;; 296 297 --) 298 # This is the start of the list of URLs. 299 shift 300 for url in "$@"; do 301 # Encode whitespaces into %20, since wget supports those URLs. 302 newurl=$(printf "%s\n" "${url}" | sed 's/ /%20/g') 303 URLS="${URLS} ${newurl}" 304 done 305 break 306 ;; 307 308 -*) 309 error "Unknown option: '$1'." 310 ;; 311 312 *) 313 # This must be a URL. 314 # Encode whitespaces into %20, since wget supports those URLs. 315 newurl=$(printf "%s\n" "${1}" | sed 's/ /%20/g') 316 URLS="${URLS} ${newurl}" 317 ;; 318 esac 319 shift 320 done 321 322 sanitize 323 exec_curl