webget.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. #include <iostream>
  2. #include <unistd.h>
  3. #include <sys/stat.h>
  4. #include <mutex>
  5. #include <curl/curl.h>
  6. #include "webget.h"
  7. #include "version.h"
  8. #include "misc.h"
  9. #include "logger.h"
  10. #ifdef _WIN32
  11. #ifndef _stat
  12. #define _stat stat
  13. #endif // _stat
  14. #endif // _WIN32
  15. extern bool print_debug_info, serve_cache_on_fetch_fail;
  16. extern int global_log_level;
  17. typedef std::lock_guard<std::mutex> guarded_mutex;
  18. std::mutex cache_rw_lock;
  19. //std::string user_agent_str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36";
  20. std::string user_agent_str = "subconverter/" + std::string(VERSION) + " cURL/" + std::string(LIBCURL_VERSION);
  21. static inline void curl_init()
  22. {
  23. static bool init = false;
  24. if(!init)
  25. {
  26. curl_global_init(CURL_GLOBAL_ALL);
  27. init = true;
  28. }
  29. }
  30. static int writer(char *data, size_t size, size_t nmemb, std::string *writerData)
  31. {
  32. if(writerData == NULL)
  33. return 0;
  34. writerData->append(data, size*nmemb);
  35. return size * nmemb;
  36. }
  37. static int size_checker(void *clientp, double dltotal, double dlnow, double ultotal, double ulnow)
  38. {
  39. if(dltotal > 1048576.0)
  40. return 1;
  41. return 0;
  42. }
  43. static inline void curl_set_common_options(CURL *curl_handle, const char *url)
  44. {
  45. curl_easy_setopt(curl_handle, CURLOPT_URL, url);
  46. curl_easy_setopt(curl_handle, CURLOPT_VERBOSE, global_log_level == LOG_LEVEL_VERBOSE ? 1L : 0L);
  47. curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 0L);
  48. curl_easy_setopt(curl_handle, CURLOPT_NOSIGNAL, 1L);
  49. curl_easy_setopt(curl_handle, CURLOPT_FOLLOWLOCATION, 1L);
  50. curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYPEER, 0L);
  51. curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYHOST, 0L);
  52. curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, 15L);
  53. curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, user_agent_str.data());
  54. curl_easy_setopt(curl_handle, CURLOPT_MAXFILESIZE, 1048576L);
  55. curl_easy_setopt(curl_handle, CURLOPT_PROGRESSFUNCTION, size_checker);
  56. }
  57. static std::string curlGet(const std::string &url, const std::string &proxy, std::string &response_headers, CURLcode &return_code)
  58. {
  59. CURL *curl_handle;
  60. std::string data;
  61. long retVal = 0;
  62. curl_init();
  63. curl_handle = curl_easy_init();
  64. curl_set_common_options(curl_handle, url.data());
  65. curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, writer);
  66. curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, &data);
  67. curl_easy_setopt(curl_handle, CURLOPT_HEADERFUNCTION, writer);
  68. curl_easy_setopt(curl_handle, CURLOPT_HEADERDATA, &response_headers);
  69. if(proxy.size())
  70. curl_easy_setopt(curl_handle, CURLOPT_PROXY, proxy.data());
  71. return_code = curl_easy_perform(curl_handle);
  72. curl_easy_getinfo(curl_handle, CURLINFO_HTTP_CODE, &retVal);
  73. curl_easy_cleanup(curl_handle);
  74. if(return_code != CURLE_OK || retVal != 200)
  75. data.clear();
  76. data.shrink_to_fit();
  77. return data;
  78. }
  79. // data:[<mediatype>][;base64],<data>
  80. static std::string dataGet(const std::string &url)
  81. {
  82. if (!startsWith(url, "data:"))
  83. return "";
  84. std::string::size_type comma = url.find(',');
  85. if (comma == std::string::npos)
  86. return "";
  87. std::string data = UrlDecode(url.substr(comma));
  88. if (endsWith(url.substr(0, comma), ";base64")) {
  89. return urlsafe_base64_decode(data);
  90. } else {
  91. return data;
  92. }
  93. }
  94. std::string buildSocks5ProxyString(const std::string &addr, int port, const std::string &username, const std::string &password)
  95. {
  96. std::string authstr = username.size() && password.size() ? username + ":" + password + "@" : "";
  97. std::string proxystr = "socks5://" + authstr + addr + ":" + std::to_string(port);
  98. return proxystr;
  99. }
  100. std::string webGet(const std::string &url, const std::string &proxy, std::string &response_headers, unsigned int cache_ttl)
  101. {
  102. std::string content;
  103. CURLcode return_code;
  104. if (startsWith(url, "data:"))
  105. return dataGet(url);
  106. // cache system
  107. if(cache_ttl > 0)
  108. {
  109. md("cache");
  110. const std::string url_md5 = getMD5(url);
  111. const std::string path = "cache/" + url_md5, path_header = path + "_header";
  112. struct stat result;
  113. if(stat(path.data(), &result) == 0) // cache exist
  114. {
  115. time_t mtime = result.st_mtime, now = time(NULL); // get cache modified time and current time
  116. if(difftime(now, mtime) <= cache_ttl) // within TTL
  117. {
  118. writeLog(0, "CACHE HIT: '" + url + "', using local cache.");
  119. guarded_mutex guard(cache_rw_lock);
  120. response_headers = fileGet(path_header, true);
  121. return fileGet(path, true);
  122. }
  123. writeLog(0, "CACHE MISS: '" + url + "', TTL timeout, creating new cache."); // out of TTL
  124. }
  125. else
  126. writeLog(0, "CACHE NOT EXIST: '" + url + "', creating new cache.");
  127. content = curlGet(url, proxy, response_headers, return_code); // try to fetch data
  128. if(return_code == CURLE_OK) // success, save new cache
  129. {
  130. guarded_mutex guard(cache_rw_lock);
  131. fileWrite(path, content, true);
  132. fileWrite(path_header, response_headers, true);
  133. }
  134. else
  135. {
  136. if(fileExist(path) && serve_cache_on_fetch_fail) // failed, check if cache exist
  137. {
  138. writeLog(0, "Fetch failed. Serving cached content."); // cache exist, serving cache
  139. guarded_mutex guard(cache_rw_lock);
  140. content = fileGet(path, true);
  141. response_headers = fileGet(path_header, true);
  142. }
  143. else
  144. writeLog(0, "Fetch failed. No local cache available."); // cache not exist or not allow to serve cache, serving nothing
  145. }
  146. return content;
  147. }
  148. return curlGet(url, proxy, response_headers, return_code);
  149. }
  150. std::string webGet(const std::string &url, const std::string &proxy)
  151. {
  152. std::string dummy;
  153. return webGet(url, proxy, dummy);
  154. }
  155. std::string webGet(const std::string &url, const std::string &proxy, unsigned int cache_ttl)
  156. {
  157. std::string dummy;
  158. return webGet(url, proxy, dummy, cache_ttl);
  159. }
  160. int curlPost(const std::string &url, const std::string &data, const std::string &proxy, const string_array &request_headers, std::string *retData)
  161. {
  162. CURL *curl_handle;
  163. CURLcode res;
  164. struct curl_slist *list = NULL;
  165. long retVal = 0;
  166. curl_init();
  167. curl_handle = curl_easy_init();
  168. list = curl_slist_append(list, "Content-Type: application/json;charset='utf-8'");
  169. for(const std::string &x : request_headers)
  170. list = curl_slist_append(list, x.data());
  171. curl_set_common_options(curl_handle, url.data());
  172. curl_easy_setopt(curl_handle, CURLOPT_POST, 1L);
  173. curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDS, data.data());
  174. curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDSIZE, data.size());
  175. curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, writer);
  176. curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, retData);
  177. curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, list);
  178. curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, user_agent_str.data());
  179. if(proxy.size())
  180. curl_easy_setopt(curl_handle, CURLOPT_PROXY, proxy.data());
  181. res = curl_easy_perform(curl_handle);
  182. curl_slist_free_all(list);
  183. if(res == CURLE_OK)
  184. {
  185. res = curl_easy_getinfo(curl_handle, CURLINFO_HTTP_CODE, &retVal);
  186. }
  187. curl_easy_cleanup(curl_handle);
  188. return retVal;
  189. }
  190. int webPost(const std::string &url, const std::string &data, const std::string &proxy, const string_array &request_headers, std::string *retData)
  191. {
  192. return curlPost(url, data, proxy, request_headers, retData);
  193. }
  194. int curlPatch(const std::string &url, const std::string &data, const std::string &proxy, const string_array &request_headers, std::string *retData)
  195. {
  196. CURL *curl_handle;
  197. CURLcode res;
  198. long retVal = 0;
  199. struct curl_slist *list = NULL;
  200. curl_init();
  201. curl_handle = curl_easy_init();
  202. list = curl_slist_append(list, "Content-Type: application/json;charset='utf-8'");
  203. for(const std::string &x : request_headers)
  204. list = curl_slist_append(list, x.data());
  205. curl_set_common_options(curl_handle, url.data());
  206. curl_easy_setopt(curl_handle, CURLOPT_CUSTOMREQUEST, "PATCH");
  207. curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDS, data.data());
  208. curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDSIZE, data.size());
  209. curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, writer);
  210. curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, retData);
  211. curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, list);
  212. curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, user_agent_str.data());
  213. if(proxy.size())
  214. curl_easy_setopt(curl_handle, CURLOPT_PROXY, proxy.data());
  215. res = curl_easy_perform(curl_handle);
  216. curl_slist_free_all(list);
  217. if(res == CURLE_OK)
  218. {
  219. res = curl_easy_getinfo(curl_handle, CURLINFO_HTTP_CODE, &retVal);
  220. }
  221. curl_easy_cleanup(curl_handle);
  222. return retVal;
  223. }
  224. int webPatch(const std::string &url, const std::string &data, const std::string &proxy, const string_array &request_headers, std::string *retData)
  225. {
  226. return curlPatch(url, data, proxy, request_headers, retData);
  227. }
  228. int curlHead(const std::string &url, const std::string &proxy, const string_array &request_headers, std::string &response_headers)
  229. {
  230. CURL *curl_handle;
  231. CURLcode res;
  232. long retVal = 0;
  233. struct curl_slist *list = NULL;
  234. curl_init();
  235. curl_handle = curl_easy_init();
  236. list = curl_slist_append(list, "Content-Type: application/json;charset='utf-8'");
  237. for(const std::string &x : request_headers)
  238. list = curl_slist_append(list, x.data());
  239. curl_set_common_options(curl_handle, url.data());
  240. curl_easy_setopt(curl_handle, CURLOPT_HEADERFUNCTION, writer);
  241. curl_easy_setopt(curl_handle, CURLOPT_HEADERDATA, &response_headers);
  242. curl_easy_setopt(curl_handle, CURLOPT_NOBODY, 1L);
  243. curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, list);
  244. curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, user_agent_str.data());
  245. if(proxy.size())
  246. curl_easy_setopt(curl_handle, CURLOPT_PROXY, proxy.data());
  247. res = curl_easy_perform(curl_handle);
  248. curl_slist_free_all(list);
  249. if(res == CURLE_OK)
  250. res = curl_easy_getinfo(curl_handle, CURLINFO_HTTP_CODE, &retVal);
  251. curl_easy_cleanup(curl_handle);
  252. return retVal;
  253. }
  254. int webHead(const std::string &url, const std::string &proxy, const string_array &request_headers, std::string &response_headers)
  255. {
  256. return curlHead(url, proxy, request_headers, response_headers);
  257. }