webget.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. #include <iostream>
  2. #include <unistd.h>
  3. #include <sys/stat.h>
  4. #include <mutex>
  5. #include <curl/curl.h>
  6. #include "webget.h"
  7. #include "version.h"
  8. #include "misc.h"
  9. #include "logger.h"
  10. #ifdef _WIN32
  11. #ifndef _stat
  12. #define _stat stat
  13. #endif // _stat
  14. #endif // _WIN32
  15. extern bool print_debug_info, serve_cache_on_fetch_fail;
  16. extern int global_log_level;
  17. typedef std::lock_guard<std::mutex> guarded_mutex;
  18. std::mutex cache_rw_lock;
  19. //std::string user_agent_str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36";
  20. std::string user_agent_str = "subconverter/" VERSION " cURL/" LIBCURL_VERSION;
  21. static inline void curl_init()
  22. {
  23. static bool init = false;
  24. if(!init)
  25. {
  26. curl_global_init(CURL_GLOBAL_ALL);
  27. init = true;
  28. }
  29. }
  30. static int writer(char *data, size_t size, size_t nmemb, std::string *writerData)
  31. {
  32. if(writerData == NULL)
  33. return 0;
  34. writerData->append(data, size*nmemb);
  35. return size * nmemb;
  36. }
  37. static int size_checker(void *clientp, double dltotal, double dlnow, double ultotal, double ulnow)
  38. {
  39. if(dltotal > 1048576.0)
  40. return 1;
  41. return 0;
  42. }
  43. static inline void curl_set_common_options(CURL *curl_handle, const char *url)
  44. {
  45. curl_easy_setopt(curl_handle, CURLOPT_URL, url);
  46. curl_easy_setopt(curl_handle, CURLOPT_VERBOSE, global_log_level == LOG_LEVEL_VERBOSE ? 1L : 0L);
  47. curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 0L);
  48. curl_easy_setopt(curl_handle, CURLOPT_NOSIGNAL, 1L);
  49. curl_easy_setopt(curl_handle, CURLOPT_FOLLOWLOCATION, 1L);
  50. curl_easy_setopt(curl_handle, CURLOPT_MAXREDIRS, 20L);
  51. curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYPEER, 0L);
  52. curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYHOST, 0L);
  53. curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, 15L);
  54. curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, user_agent_str.data());
  55. curl_easy_setopt(curl_handle, CURLOPT_MAXFILESIZE, 1048576L);
  56. curl_easy_setopt(curl_handle, CURLOPT_PROGRESSFUNCTION, size_checker);
  57. }
  58. static std::string curlGet(const std::string &url, const std::string &proxy, std::string &response_headers, CURLcode &return_code)
  59. {
  60. CURL *curl_handle;
  61. std::string data, new_url = url;
  62. struct curl_slist *list = NULL;
  63. long retVal = 0;
  64. curl_init();
  65. curl_handle = curl_easy_init();
  66. if(proxy.size())
  67. {
  68. if(startsWith(proxy, "cors:"))
  69. {
  70. list = curl_slist_append(list, "X-Requested-With: subconverter " VERSION);
  71. curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, list);
  72. new_url = proxy.substr(5) + url;
  73. }
  74. else
  75. curl_easy_setopt(curl_handle, CURLOPT_PROXY, proxy.data());
  76. }
  77. curl_set_common_options(curl_handle, new_url.data());
  78. curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, writer);
  79. curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, &data);
  80. curl_easy_setopt(curl_handle, CURLOPT_HEADERFUNCTION, writer);
  81. curl_easy_setopt(curl_handle, CURLOPT_HEADERDATA, &response_headers);
  82. unsigned int fail_count = 0, max_fails = 1;
  83. while(true)
  84. {
  85. return_code = curl_easy_perform(curl_handle);
  86. if(return_code == CURLE_OK || max_fails >= fail_count)
  87. break;
  88. else
  89. fail_count++;
  90. }
  91. curl_easy_getinfo(curl_handle, CURLINFO_HTTP_CODE, &retVal);
  92. curl_easy_cleanup(curl_handle);
  93. if(return_code != CURLE_OK || retVal != 200)
  94. data.clear();
  95. data.shrink_to_fit();
  96. return data;
  97. }
  98. // data:[<mediatype>][;base64],<data>
  99. static std::string dataGet(const std::string &url)
  100. {
  101. if (!startsWith(url, "data:"))
  102. return std::string();
  103. std::string::size_type comma = url.find(',');
  104. if (comma == std::string::npos || comma == url.size() - 1)
  105. return std::string();
  106. std::string data = UrlDecode(url.substr(comma + 1));
  107. if (endsWith(url.substr(0, comma), ";base64")) {
  108. return urlsafe_base64_decode(data);
  109. } else {
  110. return data;
  111. }
  112. }
  113. std::string buildSocks5ProxyString(const std::string &addr, int port, const std::string &username, const std::string &password)
  114. {
  115. std::string authstr = username.size() && password.size() ? username + ":" + password + "@" : "";
  116. std::string proxystr = "socks5://" + authstr + addr + ":" + std::to_string(port);
  117. return proxystr;
  118. }
  119. std::string webGet(const std::string &url, const std::string &proxy, std::string &response_headers, unsigned int cache_ttl)
  120. {
  121. std::string content;
  122. CURLcode return_code;
  123. if (startsWith(url, "data:"))
  124. return dataGet(url);
  125. // cache system
  126. if(cache_ttl > 0)
  127. {
  128. md("cache");
  129. const std::string url_md5 = getMD5(url);
  130. const std::string path = "cache/" + url_md5, path_header = path + "_header";
  131. struct stat result;
  132. if(stat(path.data(), &result) == 0) // cache exist
  133. {
  134. time_t mtime = result.st_mtime, now = time(NULL); // get cache modified time and current time
  135. if(difftime(now, mtime) <= cache_ttl) // within TTL
  136. {
  137. writeLog(0, "CACHE HIT: '" + url + "', using local cache.");
  138. guarded_mutex guard(cache_rw_lock);
  139. response_headers = fileGet(path_header, true);
  140. return fileGet(path, true);
  141. }
  142. writeLog(0, "CACHE MISS: '" + url + "', TTL timeout, creating new cache."); // out of TTL
  143. }
  144. else
  145. writeLog(0, "CACHE NOT EXIST: '" + url + "', creating new cache.");
  146. content = curlGet(url, proxy, response_headers, return_code); // try to fetch data
  147. if(return_code == CURLE_OK) // success, save new cache
  148. {
  149. guarded_mutex guard(cache_rw_lock);
  150. fileWrite(path, content, true);
  151. fileWrite(path_header, response_headers, true);
  152. }
  153. else
  154. {
  155. if(fileExist(path) && serve_cache_on_fetch_fail) // failed, check if cache exist
  156. {
  157. writeLog(0, "Fetch failed. Serving cached content."); // cache exist, serving cache
  158. guarded_mutex guard(cache_rw_lock);
  159. content = fileGet(path, true);
  160. response_headers = fileGet(path_header, true);
  161. }
  162. else
  163. writeLog(0, "Fetch failed. No local cache available."); // cache not exist or not allow to serve cache, serving nothing
  164. }
  165. return content;
  166. }
  167. return curlGet(url, proxy, response_headers, return_code);
  168. }
  169. std::string webGet(const std::string &url, const std::string &proxy)
  170. {
  171. std::string dummy;
  172. return webGet(url, proxy, dummy);
  173. }
  174. std::string webGet(const std::string &url, const std::string &proxy, unsigned int cache_ttl)
  175. {
  176. std::string dummy;
  177. return webGet(url, proxy, dummy, cache_ttl);
  178. }
  179. int curlPost(const std::string &url, const std::string &data, const std::string &proxy, const string_array &request_headers, std::string *retData)
  180. {
  181. CURL *curl_handle;
  182. CURLcode res;
  183. struct curl_slist *list = NULL;
  184. long retVal = 0;
  185. curl_init();
  186. curl_handle = curl_easy_init();
  187. list = curl_slist_append(list, "Content-Type: application/json;charset='utf-8'");
  188. for(const std::string &x : request_headers)
  189. list = curl_slist_append(list, x.data());
  190. curl_set_common_options(curl_handle, url.data());
  191. curl_easy_setopt(curl_handle, CURLOPT_POST, 1L);
  192. curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDS, data.data());
  193. curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDSIZE, data.size());
  194. curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, writer);
  195. curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, retData);
  196. curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, list);
  197. curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, user_agent_str.data());
  198. if(proxy.size())
  199. curl_easy_setopt(curl_handle, CURLOPT_PROXY, proxy.data());
  200. res = curl_easy_perform(curl_handle);
  201. curl_slist_free_all(list);
  202. if(res == CURLE_OK)
  203. {
  204. res = curl_easy_getinfo(curl_handle, CURLINFO_HTTP_CODE, &retVal);
  205. }
  206. curl_easy_cleanup(curl_handle);
  207. return retVal;
  208. }
  209. int webPost(const std::string &url, const std::string &data, const std::string &proxy, const string_array &request_headers, std::string *retData)
  210. {
  211. return curlPost(url, data, proxy, request_headers, retData);
  212. }
  213. int curlPatch(const std::string &url, const std::string &data, const std::string &proxy, const string_array &request_headers, std::string *retData)
  214. {
  215. CURL *curl_handle;
  216. CURLcode res;
  217. long retVal = 0;
  218. struct curl_slist *list = NULL;
  219. curl_init();
  220. curl_handle = curl_easy_init();
  221. list = curl_slist_append(list, "Content-Type: application/json;charset='utf-8'");
  222. for(const std::string &x : request_headers)
  223. list = curl_slist_append(list, x.data());
  224. curl_set_common_options(curl_handle, url.data());
  225. curl_easy_setopt(curl_handle, CURLOPT_CUSTOMREQUEST, "PATCH");
  226. curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDS, data.data());
  227. curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDSIZE, data.size());
  228. curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, writer);
  229. curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, retData);
  230. curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, list);
  231. curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, user_agent_str.data());
  232. if(proxy.size())
  233. curl_easy_setopt(curl_handle, CURLOPT_PROXY, proxy.data());
  234. res = curl_easy_perform(curl_handle);
  235. curl_slist_free_all(list);
  236. if(res == CURLE_OK)
  237. {
  238. res = curl_easy_getinfo(curl_handle, CURLINFO_HTTP_CODE, &retVal);
  239. }
  240. curl_easy_cleanup(curl_handle);
  241. return retVal;
  242. }
  243. int webPatch(const std::string &url, const std::string &data, const std::string &proxy, const string_array &request_headers, std::string *retData)
  244. {
  245. return curlPatch(url, data, proxy, request_headers, retData);
  246. }
  247. int curlHead(const std::string &url, const std::string &proxy, const string_array &request_headers, std::string &response_headers)
  248. {
  249. CURL *curl_handle;
  250. CURLcode res;
  251. long retVal = 0;
  252. struct curl_slist *list = NULL;
  253. curl_init();
  254. curl_handle = curl_easy_init();
  255. list = curl_slist_append(list, "Content-Type: application/json;charset='utf-8'");
  256. for(const std::string &x : request_headers)
  257. list = curl_slist_append(list, x.data());
  258. curl_set_common_options(curl_handle, url.data());
  259. curl_easy_setopt(curl_handle, CURLOPT_HEADERFUNCTION, writer);
  260. curl_easy_setopt(curl_handle, CURLOPT_HEADERDATA, &response_headers);
  261. curl_easy_setopt(curl_handle, CURLOPT_NOBODY, 1L);
  262. curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, list);
  263. curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, user_agent_str.data());
  264. if(proxy.size())
  265. curl_easy_setopt(curl_handle, CURLOPT_PROXY, proxy.data());
  266. res = curl_easy_perform(curl_handle);
  267. curl_slist_free_all(list);
  268. if(res == CURLE_OK)
  269. res = curl_easy_getinfo(curl_handle, CURLINFO_HTTP_CODE, &retVal);
  270. curl_easy_cleanup(curl_handle);
  271. return retVal;
  272. }
  273. int webHead(const std::string &url, const std::string &proxy, const string_array &request_headers, std::string &response_headers)
  274. {
  275. return curlHead(url, proxy, request_headers, response_headers);
  276. }