libzypp  17.31.8
MediaMultiCurl.cc
Go to the documentation of this file.
1 /*---------------------------------------------------------------------\
2 | ____ _ __ __ ___ |
3 | |__ / \ / / . \ . \ |
4 | / / \ V /| _/ _/ |
5 | / /__ | | | | | | |
6 | /_____||_| |_| |_| |
7 | |
8 \---------------------------------------------------------------------*/
13 #include <ctype.h>
14 #include <sys/types.h>
15 #include <signal.h>
16 #include <sys/wait.h>
17 #include <netdb.h>
18 #include <arpa/inet.h>
19 
20 #include <vector>
21 #include <iostream>
22 #include <algorithm>
23 
24 
25 #include <zypp/ZConfig.h>
26 #include <zypp/base/Logger.h>
28 #include <zypp-curl/parser/MetaLinkParser>
29 #include <zypp/ManagedFile.h>
31 #include <zypp-curl/auth/CurlAuthData>
32 
33 using std::endl;
34 using namespace zypp::base;
35 
36 #undef CURLVERSION_AT_LEAST
37 #define CURLVERSION_AT_LEAST(M,N,O) LIBCURL_VERSION_NUM >= ((((M)<<8)+(N))<<8)+(O)
38 
39 namespace zypp {
40  namespace media {
41 
42 
44 
45 
46 class multifetchrequest;
47 
48 // Hack: we derive from MediaCurl just to get the storage space for
49 // settings, url, curlerrors and the like
50 
52  friend class multifetchrequest;
53 
54 public:
55  multifetchworker(int no, multifetchrequest &request, const Url &url);
57  void nextjob();
58  void run();
59  bool checkChecksum();
60  bool recheckChecksum();
61  void disableCompetition();
62 
63  void checkdns();
64  void adddnsfd(fd_set &rset, int &maxfd);
65  void dnsevent(fd_set &rset);
66 
67  int _workerno;
68 
69  int _state;
70  bool _competing;
71 
72  size_t _blkno;
73  off_t _blkstart;
74  size_t _blksize;
76 
77  double _blkstarttime;
78  size_t _blkreceived;
79  off_t _received;
80 
81  double _avgspeed;
82  double _maxspeed;
83 
84  double _sleepuntil;
85 
86 private:
87  void stealjob();
88 
89  size_t writefunction(void *ptr, size_t size);
90  static size_t _writefunction(void *ptr, size_t size, size_t nmemb, void *stream);
91 
92  size_t headerfunction(char *ptr, size_t size);
93  static size_t _headerfunction(void *ptr, size_t size, size_t nmemb, void *stream);
94 
96  int _pass;
97  std::string _urlbuf;
98  off_t _off;
99  size_t _size;
101 
102  pid_t _pid;
103  int _dnspipe;
104 };
105 
106 #define WORKER_STARTING 0
107 #define WORKER_LOOKUP 1
108 #define WORKER_FETCH 2
109 #define WORKER_DISCARD 3
110 #define WORKER_DONE 4
111 #define WORKER_SLEEP 5
112 #define WORKER_BROKEN 6
113 
114 
115 
117 public:
118  multifetchrequest(const MediaMultiCurl *context, const Pathname &filename, const Url &baseurl, CURLM *multi, FILE *fp, callback::SendReport<DownloadProgressReport> *report, MediaBlockList *blklist, off_t filesize);
120 
121  void run(std::vector<Url> &urllist);
122 
123 protected:
124 
125  static size_t makeBlksize ( size_t filesize );
126 
127  friend class multifetchworker;
128 
132 
133  FILE *_fp;
136  off_t _filesize;
137 
138  CURLM *_multi;
139 
140  std::list<multifetchworker *> _workers;
141  bool _stealing;
143 
144  size_t _blkno;
145  size_t _defaultBlksize = 0; //< The blocksize to use if the metalink file does not specify one
146  off_t _blkoff;
151  bool _finished;
152  off_t _totalsize;
155 
156  double _starttime;
158 
161  double _periodavg;
162 
163 public:
164  double _timeout;
166  double _maxspeed;
168 };
169 
170 constexpr auto MIN_REQ_MIRRS = 4;
171 constexpr auto MAXURLS = 10;
172 
174 
175 static double
177 {
178  struct timeval tv;
179  if (gettimeofday(&tv, NULL))
180  return 0;
181  return tv.tv_sec + tv.tv_usec / 1000000.;
182 }
183 
184 size_t
185 multifetchworker::writefunction(void *ptr, size_t size)
186 {
187  size_t len, cnt;
188  if (_state == WORKER_BROKEN)
189  return size ? 0 : 1;
190 
191  double now = currentTime();
192 
193  len = size > _size ? _size : size;
194  if (!len)
195  {
196  // kill this job?
197  return size;
198  }
199 
200  if (_blkstart && _off == _blkstart)
201  {
202  // make sure that the server replied with "partial content"
203  // for http requests
204  char *effurl;
205  (void)curl_easy_getinfo(_curl, CURLINFO_EFFECTIVE_URL, &effurl);
206  if (effurl && !strncasecmp(effurl, "http", 4))
207  {
208  long statuscode = 0;
209  (void)curl_easy_getinfo(_curl, CURLINFO_RESPONSE_CODE, &statuscode);
210  if (statuscode != 206)
211  return size ? 0 : 1;
212  }
213  }
214 
215  _blkreceived += len;
216  _received += len;
217 
218  _request->_lastprogress = now;
219 
220  if (_state == WORKER_DISCARD || !_request->_fp)
221  {
222  // block is no longer needed
223  // still calculate the checksum so that we can throw out bad servers
224  if (_request->_blklist)
225  _dig.update((const char *)ptr, len);
226  _off += len;
227  _size -= len;
228  return size;
229  }
230  if (fseeko(_request->_fp, _off, SEEK_SET))
231  return size ? 0 : 1;
232  cnt = fwrite(ptr, 1, len, _request->_fp);
233  if (cnt > 0)
234  {
235  _request->_fetchedsize += cnt;
236  if (_request->_blklist)
237  _dig.update((const char *)ptr, cnt);
238  _off += cnt;
239  _size -= cnt;
240  if (cnt == len)
241  return size;
242  }
243  return cnt;
244 }
245 
246 size_t
247 multifetchworker::_writefunction(void *ptr, size_t size, size_t nmemb, void *stream)
248 {
249  multifetchworker *me = reinterpret_cast<multifetchworker *>(stream);
250  return me->writefunction(ptr, size * nmemb);
251 }
252 
253 size_t
254 multifetchworker::headerfunction(char *p, size_t size)
255 {
256  size_t l = size;
257  if (l > 9 && !strncasecmp(p, "Location:", 9))
258  {
259  std::string line(p + 9, l - 9);
260  if (line[l - 10] == '\r')
261  line.erase(l - 10, 1);
262  XXX << "#" << _workerno << ": redirecting to" << line << endl;
263  return size;
264  }
265  if (l <= 14 || l >= 128 || strncasecmp(p, "Content-Range:", 14) != 0)
266  return size;
267  p += 14;
268  l -= 14;
269  while (l && (*p == ' ' || *p == '\t'))
270  p++, l--;
271  if (l < 6 || strncasecmp(p, "bytes", 5))
272  return size;
273  p += 5;
274  l -= 5;
275  char buf[128];
276  memcpy(buf, p, l);
277  buf[l] = 0;
278  unsigned long long start, off, filesize;
279  if (sscanf(buf, "%llu-%llu/%llu", &start, &off, &filesize) != 3)
280  return size;
281  if (_request->_filesize == (off_t)-1)
282  {
283  WAR << "#" << _workerno << ": setting request filesize to " << filesize << endl;
284  _request->_filesize = filesize;
285  if (_request->_totalsize == 0 && !_request->_blklist)
286  _request->_totalsize = filesize;
287  }
288  if (_request->_filesize != (off_t)filesize)
289  {
290  XXX << "#" << _workerno << ": filesize mismatch" << endl;
292  strncpy(_curlError, "filesize mismatch", CURL_ERROR_SIZE);
293  }
294  return size;
295 }
296 
297 size_t
298 multifetchworker::_headerfunction(void *ptr, size_t size, size_t nmemb, void *stream)
299 {
300  multifetchworker *me = reinterpret_cast<multifetchworker *>(stream);
301  return me->headerfunction((char *)ptr, size * nmemb);
302 }
303 
304 multifetchworker::multifetchworker(int no, multifetchrequest &request, const Url &url)
305 : MediaCurl(url, Pathname())
306 {
307  _workerno = no;
308  _request = &request;
310  _competing = false;
311  _off = _blkstart = 0;
312  _size = _blksize = 0;
313  _pass = 0;
314  _blkno = 0;
315  _pid = 0;
316  _dnspipe = -1;
317  _blkreceived = 0;
318  _received = 0;
319  _blkstarttime = 0;
320  _avgspeed = 0;
321  _sleepuntil = 0;
323  _noendrange = false;
324 
325  Url curlUrl( clearQueryString(url) );
326  _urlbuf = curlUrl.asString();
328  if (_curl)
329  XXX << "reused worker from pool" << endl;
330  if (!_curl && !(_curl = curl_easy_init()))
331  {
333  strncpy(_curlError, "curl_easy_init failed", CURL_ERROR_SIZE);
334  return;
335  }
336  try
337  {
338  setupEasy();
339  }
340  catch (Exception &ex)
341  {
342  curl_easy_cleanup(_curl);
343  _curl = 0;
345  strncpy(_curlError, "curl_easy_setopt failed", CURL_ERROR_SIZE);
346  return;
347  }
348  curl_easy_setopt(_curl, CURLOPT_PRIVATE, this);
349  curl_easy_setopt(_curl, CURLOPT_URL, _urlbuf.c_str());
350  curl_easy_setopt(_curl, CURLOPT_WRITEFUNCTION, &_writefunction);
351  curl_easy_setopt(_curl, CURLOPT_WRITEDATA, this);
352  if (_request->_filesize == off_t(-1) || !_request->_blklist || !_request->_blklist->haveChecksum(0))
353  {
354  curl_easy_setopt(_curl, CURLOPT_HEADERFUNCTION, &_headerfunction);
355  curl_easy_setopt(_curl, CURLOPT_HEADERDATA, this);
356  }
357  // if this is the same host copy authorization
358  // (the host check is also what curl does when doing a redirect)
359  // (note also that unauthorized exceptions are thrown with the request host)
360  if (url.getHost() == _request->_context->_url.getHost())
361  {
365  if ( _settings.userPassword().size() )
366  {
367  curl_easy_setopt(_curl, CURLOPT_USERPWD, _settings.userPassword().c_str());
368  std::string use_auth = _settings.authType();
369  if (use_auth.empty())
370  use_auth = "digest,basic"; // our default
371  long auth = CurlAuthData::auth_type_str2long(use_auth);
372  if( auth != CURLAUTH_NONE)
373  {
374  XXX << "#" << _workerno << ": Enabling HTTP authentication methods: " << use_auth
375  << " (CURLOPT_HTTPAUTH=" << auth << ")" << std::endl;
376  curl_easy_setopt(_curl, CURLOPT_HTTPAUTH, auth);
377  }
378  }
379  }
380  checkdns();
381 }
382 
384 {
385  if (_curl)
386  {
388  curl_multi_remove_handle(_request->_multi, _curl);
389  if (_state == WORKER_DONE || _state == WORKER_SLEEP)
390  {
391 #if CURLVERSION_AT_LEAST(7,15,5)
392  curl_easy_setopt(_curl, CURLOPT_MAX_RECV_SPEED_LARGE, (curl_off_t)0);
393 #endif
394  curl_easy_setopt(_curl, CURLOPT_PRIVATE, (void *)0);
395  curl_easy_setopt(_curl, CURLOPT_WRITEFUNCTION, (void *)0);
396  curl_easy_setopt(_curl, CURLOPT_WRITEDATA, (void *)0);
397  curl_easy_setopt(_curl, CURLOPT_HEADERFUNCTION, (void *)0);
398  curl_easy_setopt(_curl, CURLOPT_HEADERDATA, (void *)0);
400  }
401  else
402  curl_easy_cleanup(_curl);
403  _curl = 0;
404  }
405  if (_pid)
406  {
407  kill(_pid, SIGKILL);
408  int status;
409  while (waitpid(_pid, &status, 0) == -1)
410  if (errno != EINTR)
411  break;
412  _pid = 0;
413  }
414  if (_dnspipe != -1)
415  {
416  close(_dnspipe);
417  _dnspipe = -1;
418  }
419  // the destructor in MediaCurl doesn't call disconnect() if
420  // the media is not attached, so we do it here manually
421  disconnectFrom();
422 }
423 
424 static inline bool env_isset(std::string name)
425 {
426  const char *s = getenv(name.c_str());
427  return s && *s ? true : false;
428 }
429 
430 void
432 {
433  std::string host = _url.getHost();
434 
435  if (host.empty())
436  return;
437 
438  if (_request->_context->isDNSok(host))
439  return;
440 
441  // no need to do dns checking for numeric hosts
442  char addrbuf[128];
443  if (inet_pton(AF_INET, host.c_str(), addrbuf) == 1)
444  return;
445  if (inet_pton(AF_INET6, host.c_str(), addrbuf) == 1)
446  return;
447 
448  // no need to do dns checking if we use a proxy
449  if (!_settings.proxy().empty())
450  return;
451  if (env_isset("all_proxy") || env_isset("ALL_PROXY"))
452  return;
453  std::string schemeproxy = _url.getScheme() + "_proxy";
454  if (env_isset(schemeproxy))
455  return;
456  if (schemeproxy != "http_proxy")
457  {
458  std::transform(schemeproxy.begin(), schemeproxy.end(), schemeproxy.begin(), ::toupper);
459  if (env_isset(schemeproxy))
460  return;
461  }
462 
463  XXX << "checking DNS lookup of " << host << endl;
464  int pipefds[2];
465  if (pipe(pipefds))
466  {
468  strncpy(_curlError, "DNS pipe creation failed", CURL_ERROR_SIZE);
469  return;
470  }
471  _pid = fork();
472  if (_pid == pid_t(-1))
473  {
474  close(pipefds[0]);
475  close(pipefds[1]);
476  _pid = 0;
478  strncpy(_curlError, "DNS checker fork failed", CURL_ERROR_SIZE);
479  return;
480  }
481  else if (_pid == 0)
482  {
483  close(pipefds[0]);
484  // XXX: close all other file descriptors
485  struct addrinfo *ai, aihints;
486  memset(&aihints, 0, sizeof(aihints));
487  aihints.ai_family = PF_UNSPEC;
488  int tstsock = socket(PF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0);
489  if (tstsock == -1)
490  aihints.ai_family = PF_INET;
491  else
492  close(tstsock);
493  aihints.ai_socktype = SOCK_STREAM;
494  aihints.ai_flags = AI_CANONNAME;
495  unsigned int connecttimeout = _request->_connect_timeout;
496  if (connecttimeout)
497  alarm(connecttimeout);
498  signal(SIGALRM, SIG_DFL);
499  if (getaddrinfo(host.c_str(), NULL, &aihints, &ai))
500  _exit(1);
501  _exit(0);
502  }
503  close(pipefds[1]);
504  _dnspipe = pipefds[0];
506 }
507 
508 void
509 multifetchworker::adddnsfd(fd_set &rset, int &maxfd)
510 {
511  if (_state != WORKER_LOOKUP)
512  return;
513  FD_SET(_dnspipe, &rset);
514  if (maxfd < _dnspipe)
515  maxfd = _dnspipe;
516 }
517 
518 void
520 {
521 
522  if (_state != WORKER_LOOKUP || !FD_ISSET(_dnspipe, &rset))
523  return;
524  int status;
525  while (waitpid(_pid, &status, 0) == -1)
526  {
527  if (errno != EINTR)
528  return;
529  }
530  _pid = 0;
531  if (_dnspipe != -1)
532  {
533  close(_dnspipe);
534  _dnspipe = -1;
535  }
536  if (!WIFEXITED(status))
537  {
539  strncpy(_curlError, "DNS lookup failed", CURL_ERROR_SIZE);
541  return;
542  }
543  int exitcode = WEXITSTATUS(status);
544  XXX << "#" << _workerno << ": DNS lookup returned " << exitcode << endl;
545  if (exitcode != 0)
546  {
548  strncpy(_curlError, "DNS lookup failed", CURL_ERROR_SIZE);
550  return;
551  }
553  nextjob();
554 }
555 
556 bool
558 {
559  // XXX << "checkChecksum block " << _blkno << endl;
560  if (!_blksize || !_request->_blklist)
561  return true;
563 }
564 
565 bool
567 {
568  // XXX << "recheckChecksum block " << _blkno << endl;
569  if (!_request->_fp || !_blksize || !_request->_blklist)
570  return true;
571  if (fseeko(_request->_fp, _blkstart, SEEK_SET))
572  return false;
573  char buf[4096];
574  size_t l = _blksize;
575  _request->_blklist->createDigest(_dig); // resets digest
576  while (l)
577  {
578  size_t cnt = l > sizeof(buf) ? sizeof(buf) : l;
579  if (fread(buf, cnt, 1, _request->_fp) != 1)
580  return false;
581  _dig.update(buf, cnt);
582  l -= cnt;
583  }
585 }
586 
587 
588 void
590 {
591  if (!_request->_stealing)
592  {
593  XXX << "start stealing!" << endl;
594  _request->_stealing = true;
595  }
596  multifetchworker *best = 0;
597  std::list<multifetchworker *>::iterator workeriter = _request->_workers.begin();
598  double now = 0;
599  for (; workeriter != _request->_workers.end(); ++workeriter)
600  {
601  multifetchworker *worker = *workeriter;
602  if (worker == this)
603  continue;
604  if (worker->_pass == -1)
605  continue; // do not steal!
606  if (worker->_state == WORKER_DISCARD || worker->_state == WORKER_DONE || worker->_state == WORKER_SLEEP || !worker->_blksize)
607  continue; // do not steal finished jobs
608  if (!worker->_avgspeed && worker->_blkreceived)
609  {
610  if (!now)
611  now = currentTime();
612  if (now > worker->_blkstarttime)
613  worker->_avgspeed = worker->_blkreceived / (now - worker->_blkstarttime);
614  }
615  if (!best || best->_pass > worker->_pass)
616  {
617  best = worker;
618  continue;
619  }
620  if (best->_pass < worker->_pass)
621  continue;
622  // if it is the same block, we want to know the best worker, otherwise the worst
623  if (worker->_blkstart == best->_blkstart)
624  {
625  if ((worker->_blksize - worker->_blkreceived) * best->_avgspeed < (best->_blksize - best->_blkreceived) * worker->_avgspeed)
626  best = worker;
627  }
628  else
629  {
630  if ((worker->_blksize - worker->_blkreceived) * best->_avgspeed > (best->_blksize - best->_blkreceived) * worker->_avgspeed)
631  best = worker;
632  }
633  }
634  if (!best)
635  {
638  _request->_finished = true;
639  return;
640  }
641  // do not sleep twice
642  if (_state != WORKER_SLEEP)
643  {
644  if (!_avgspeed && _blkreceived)
645  {
646  if (!now)
647  now = currentTime();
648  if (now > _blkstarttime)
650  }
651 
652  // lets see if we should sleep a bit
653  XXX << "me #" << _workerno << ": " << _avgspeed << ", size " << best->_blksize << endl;
654  XXX << "best #" << best->_workerno << ": " << best->_avgspeed << ", size " << (best->_blksize - best->_blkreceived) << endl;
655  if (_avgspeed && best->_avgspeed && best->_blksize - best->_blkreceived > 0 &&
656  (best->_blksize - best->_blkreceived) * _avgspeed < best->_blksize * best->_avgspeed)
657  {
658  if (!now)
659  now = currentTime();
660  double sl = (best->_blksize - best->_blkreceived) / best->_avgspeed * 2;
661  if (sl > 1)
662  sl = 1;
663  XXX << "#" << _workerno << ": going to sleep for " << sl * 1000 << " ms" << endl;
664  _sleepuntil = now + sl;
667  return;
668  }
669  }
670 
671  _competing = true;
672  best->_competing = true;
673  _blkstart = best->_blkstart;
674  _blksize = best->_blksize;
675  best->_pass++;
676  _pass = best->_pass;
677  _blkno = best->_blkno;
678  run();
679 }
680 
681 void
683 {
684  std::list<multifetchworker *>::iterator workeriter = _request->_workers.begin();
685  for (; workeriter != _request->_workers.end(); ++workeriter)
686  {
687  multifetchworker *worker = *workeriter;
688  if (worker == this)
689  continue;
690  if (worker->_blkstart == _blkstart)
691  {
692  if (worker->_state == WORKER_FETCH)
693  worker->_state = WORKER_DISCARD;
694  worker->_pass = -1; /* do not steal this one, we already have it */
695  }
696  }
697 }
698 
699 
700 void
702 {
703  _noendrange = false;
704  if (_request->_stealing)
705  {
706  stealjob();
707  return;
708  }
709 
710  MediaBlockList *blklist = _request->_blklist;
711  if (!blklist)
712  {
714  if (_request->_filesize != off_t(-1))
715  {
717  {
718  stealjob();
719  return;
720  }
724  }
725  DBG << "No BLOCKLIST falling back to chunk size: " << _request->_defaultBlksize << std::endl;
726  }
727  else
728  {
729  MediaBlock blk = blklist->getBlock(_request->_blkno);
730  while (_request->_blkoff >= (off_t)(blk.off + blk.size))
731  {
732  if (++_request->_blkno == blklist->numBlocks())
733  {
734  stealjob();
735  return;
736  }
737  blk = blklist->getBlock(_request->_blkno);
738  _request->_blkoff = blk.off;
739  }
740  _blksize = blk.off + blk.size - _request->_blkoff;
742  DBG << "Block: "<< _request->_blkno << " has no checksum falling back to default blocksize: " << _request->_defaultBlksize << std::endl;
744  }
745  }
749  run();
750 }
751 
752 void
754 {
755  char rangebuf[128];
756 
757  if (_state == WORKER_BROKEN || _state == WORKER_DONE)
758  return; // just in case...
759  if (_noendrange)
760  sprintf(rangebuf, "%llu-", (unsigned long long)_blkstart);
761  else
762  sprintf(rangebuf, "%llu-%llu", (unsigned long long)_blkstart, (unsigned long long)_blkstart + _blksize - 1);
763  XXX << "#" << _workerno << ": BLK " << _blkno << ":" << rangebuf << " " << _url << endl;
764  if (curl_easy_setopt(_curl, CURLOPT_RANGE, !_noendrange || _blkstart != 0 ? rangebuf : (char *)0) != CURLE_OK)
765  {
768  strncpy(_curlError, "curl_easy_setopt range failed", CURL_ERROR_SIZE);
769  return;
770  }
771  if (curl_multi_add_handle(_request->_multi, _curl) != CURLM_OK)
772  {
775  strncpy(_curlError, "curl_multi_add_handle failed", CURL_ERROR_SIZE);
776  return;
777  }
778  _request->_havenewjob = true;
779  _off = _blkstart;
780  _size = _blksize;
781  if (_request->_blklist)
782  _request->_blklist->createDigest(_dig); // resets digest
784 
785  double now = currentTime();
786  _blkstarttime = now;
787  _blkreceived = 0;
788 }
789 
790 
792 
793 
794 multifetchrequest::multifetchrequest(const MediaMultiCurl *context, const Pathname &filename, const Url &baseurl, CURLM *multi, FILE *fp, callback::SendReport<DownloadProgressReport> *report, MediaBlockList *blklist, off_t filesize) : _context(context), _filename(filename), _baseurl(baseurl)
795 {
796  _fp = fp;
797  _report = report;
798  _blklist = blklist;
799  _filesize = filesize;
800  _defaultBlksize = makeBlksize( filesize );
801  _multi = multi;
802  _stealing = false;
803  _havenewjob = false;
804  _blkno = 0;
805  if (_blklist)
807  else
808  _blkoff = 0;
809  _activeworkers = 0;
810  _lookupworkers = 0;
811  _sleepworkers = 0;
812  _minsleepuntil = 0;
813  _finished = false;
814  _fetchedsize = 0;
815  _fetchedgoodsize = 0;
816  _totalsize = 0;
818  _lastperiodfetched = 0;
819  _periodavg = 0;
820  _timeout = 0;
821  _connect_timeout = 0;
822  _maxspeed = 0;
823  _maxworkers = 0;
824  if (blklist)
825  {
826  for (size_t blkno = 0; blkno < blklist->numBlocks(); blkno++)
827  {
828  MediaBlock blk = blklist->getBlock(blkno);
829  _totalsize += blk.size;
830  }
831  }
832  else if (filesize != off_t(-1))
833  _totalsize = filesize;
834 }
835 
837 {
838  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
839  {
840  multifetchworker *worker = *workeriter;
841  *workeriter = NULL;
842  delete worker;
843  }
844  _workers.clear();
845 }
846 
847 void
848 multifetchrequest::run(std::vector<Url> &urllist)
849 {
850  int workerno = 0;
851  std::vector<Url>::iterator urliter = urllist.begin();
852  for (;;)
853  {
854  fd_set rset, wset, xset;
855  int maxfd, nqueue;
856 
857  if (_finished)
858  {
859  XXX << "finished!" << endl;
860  break;
861  }
862 
863  if ((int)_activeworkers < _maxworkers && urliter != urllist.end() && _workers.size() < MAXURLS)
864  {
865  // spawn another worker!
866  multifetchworker *worker = new multifetchworker(workerno++, *this, *urliter);
867  _workers.push_back(worker);
868  if (worker->_state != WORKER_BROKEN)
869  {
870  _activeworkers++;
871  if (worker->_state != WORKER_LOOKUP)
872  {
873  worker->nextjob();
874  }
875  else
876  _lookupworkers++;
877  }
878  ++urliter;
879  continue;
880  }
881  if (!_activeworkers)
882  {
883  WAR << "No more active workers!" << endl;
884  // show the first worker error we find
885  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
886  {
887  if ((*workeriter)->_state != WORKER_BROKEN)
888  continue;
889  ZYPP_THROW(MediaCurlException(_baseurl, "Server error", (*workeriter)->_curlError));
890  }
891  break;
892  }
893 
894  FD_ZERO(&rset);
895  FD_ZERO(&wset);
896  FD_ZERO(&xset);
897 
898  curl_multi_fdset(_multi, &rset, &wset, &xset, &maxfd);
899 
900  if (_lookupworkers)
901  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
902  (*workeriter)->adddnsfd(rset, maxfd);
903 
904  timeval tv;
905  // if we added a new job we have to call multi_perform once
906  // to make it show up in the fd set. do not sleep in this case.
907  tv.tv_sec = 0;
908  tv.tv_usec = _havenewjob ? 0 : 200000;
909  if (_sleepworkers && !_havenewjob)
910  {
911  if (_minsleepuntil == 0)
912  {
913  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
914  {
915  multifetchworker *worker = *workeriter;
916  if (worker->_state != WORKER_SLEEP)
917  continue;
918  if (!_minsleepuntil || _minsleepuntil > worker->_sleepuntil)
919  _minsleepuntil = worker->_sleepuntil;
920  }
921  }
922  double sl = _minsleepuntil - currentTime();
923  if (sl < 0)
924  {
925  sl = 0;
926  _minsleepuntil = 0;
927  }
928  if (sl < .2)
929  tv.tv_usec = sl * 1000000;
930  }
931  int r = select(maxfd + 1, &rset, &wset, &xset, &tv);
932  if (r == -1 && errno != EINTR)
933  ZYPP_THROW(MediaCurlException(_baseurl, "select() failed", "unknown error"));
934  if (r != 0 && _lookupworkers)
935  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
936  {
937  multifetchworker *worker = *workeriter;
938  if (worker->_state != WORKER_LOOKUP)
939  continue;
940  (*workeriter)->dnsevent(rset);
941  if (worker->_state != WORKER_LOOKUP)
942  _lookupworkers--;
943  }
944  _havenewjob = false;
945 
946  // run curl
947  for (;;)
948  {
949  CURLMcode mcode;
950  int tasks;
951  mcode = curl_multi_perform(_multi, &tasks);
952  if (mcode == CURLM_CALL_MULTI_PERFORM)
953  continue;
954  if (mcode != CURLM_OK)
955  ZYPP_THROW(MediaCurlException(_baseurl, "curl_multi_perform", "unknown error"));
956  break;
957  }
958 
959  double now = currentTime();
960 
961  // update periodavg
962  if (now > _lastperiodstart + .5)
963  {
964  if (!_periodavg)
966  else
969  _lastperiodstart = now;
970  }
971 
972  // wake up sleepers
973  if (_sleepworkers)
974  {
975  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
976  {
977  multifetchworker *worker = *workeriter;
978  if (worker->_state != WORKER_SLEEP)
979  continue;
980  if (worker->_sleepuntil > now)
981  continue;
982  if (_minsleepuntil == worker->_sleepuntil)
983  _minsleepuntil = 0;
984  XXX << "#" << worker->_workerno << ": sleep done, wake up" << endl;
985  _sleepworkers--;
986  // nextjob chnages the state
987  worker->nextjob();
988  }
989  }
990 
991  // collect all curl results, reschedule new jobs
992  CURLMsg *msg;
993  while ((msg = curl_multi_info_read(_multi, &nqueue)) != 0)
994  {
995  if (msg->msg != CURLMSG_DONE)
996  continue;
997  CURL *easy = msg->easy_handle;
998  CURLcode cc = msg->data.result;
999  multifetchworker *worker;
1000  if (curl_easy_getinfo(easy, CURLINFO_PRIVATE, &worker) != CURLE_OK)
1001  ZYPP_THROW(MediaCurlException(_baseurl, "curl_easy_getinfo", "unknown error"));
1002  if (worker->_blkreceived && now > worker->_blkstarttime)
1003  {
1004  if (worker->_avgspeed)
1005  worker->_avgspeed = (worker->_avgspeed + worker->_blkreceived / (now - worker->_blkstarttime)) / 2;
1006  else
1007  worker->_avgspeed = worker->_blkreceived / (now - worker->_blkstarttime);
1008  }
1009  XXX << "#" << worker->_workerno << ": BLK " << worker->_blkno << " done code " << cc << " speed " << worker->_avgspeed << endl;
1010  curl_multi_remove_handle(_multi, easy);
1011  if (cc == CURLE_HTTP_RETURNED_ERROR)
1012  {
1013  long statuscode = 0;
1014  (void)curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &statuscode);
1015  XXX << "HTTP status " << statuscode << endl;
1016  if (statuscode == 416 && !_blklist) /* Range error */
1017  {
1018  if (_filesize == off_t(-1))
1019  {
1020  if (!worker->_noendrange)
1021  {
1022  XXX << "#" << worker->_workerno << ": retrying with no end range" << endl;
1023  worker->_noendrange = true;
1024  worker->run();
1025  continue;
1026  }
1027  worker->_noendrange = false;
1028  worker->stealjob();
1029  continue;
1030  }
1031  if (worker->_blkstart >= _filesize)
1032  {
1033  worker->nextjob();
1034  continue;
1035  }
1036  }
1037  }
1038  if (cc == 0)
1039  {
1040  if (!worker->checkChecksum())
1041  {
1042  WAR << "#" << worker->_workerno << ": checksum error, disable worker" << endl;
1043  worker->_state = WORKER_BROKEN;
1044  strncpy(worker->_curlError, "checksum error", CURL_ERROR_SIZE);
1045  _activeworkers--;
1046  continue;
1047  }
1048  if (worker->_state == WORKER_FETCH)
1049  {
1050  if (worker->_competing)
1051  {
1052  worker->disableCompetition();
1053  // multiple workers wrote into this block. We already know that our
1054  // data was correct, but maybe some other worker overwrote our data
1055  // with something broken. Thus we have to re-check the block.
1056  if (!worker->recheckChecksum())
1057  {
1058  XXX << "#" << worker->_workerno << ": recheck checksum error, refetch block" << endl;
1059  // re-fetch! No need to worry about the bad workers,
1060  // they will now be set to DISCARD. At the end of their block
1061  // they will notice that they wrote bad data and go into BROKEN.
1062  worker->run();
1063  continue;
1064  }
1065  }
1066  _fetchedgoodsize += worker->_blksize;
1067  }
1068 
1069  // make bad workers sleep a little
1070  double maxavg = 0;
1071  int maxworkerno = 0;
1072  int numbetter = 0;
1073  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
1074  {
1075  multifetchworker *oworker = *workeriter;
1076  if (oworker->_state == WORKER_BROKEN)
1077  continue;
1078  if (oworker->_avgspeed > maxavg)
1079  {
1080  maxavg = oworker->_avgspeed;
1081  maxworkerno = oworker->_workerno;
1082  }
1083  if (oworker->_avgspeed > worker->_avgspeed)
1084  numbetter++;
1085  }
1086  if (maxavg && !_stealing)
1087  {
1088  double ratio = worker->_avgspeed / maxavg;
1089  ratio = 1 - ratio;
1090  if (numbetter < 3) // don't sleep that much if we're in the top two
1091  ratio = ratio * ratio;
1092  if (ratio > .01)
1093  {
1094  XXX << "#" << worker->_workerno << ": too slow ("<< ratio << ", " << worker->_avgspeed << ", #" << maxworkerno << ": " << maxavg << "), going to sleep for " << ratio * 1000 << " ms" << endl;
1095  worker->_sleepuntil = now + ratio;
1096  worker->_state = WORKER_SLEEP;
1097  _sleepworkers++;
1098  continue;
1099  }
1100  }
1101 
1102  // do rate control (if requested)
1103  // should use periodavg, but that's not what libcurl does
1104  if (_maxspeed && now > _starttime)
1105  {
1106  double avg = _fetchedsize / (now - _starttime);
1107  avg = worker->_maxspeed * _maxspeed / avg;
1108  if (avg < _maxspeed / _maxworkers)
1109  avg = _maxspeed / _maxworkers;
1110  if (avg > _maxspeed)
1111  avg = _maxspeed;
1112  if (avg < 1024)
1113  avg = 1024;
1114  worker->_maxspeed = avg;
1115 #if CURLVERSION_AT_LEAST(7,15,5)
1116  curl_easy_setopt(worker->_curl, CURLOPT_MAX_RECV_SPEED_LARGE, (curl_off_t)(avg));
1117 #endif
1118  }
1119 
1120  worker->nextjob();
1121  }
1122  else
1123  {
1124  worker->_state = WORKER_BROKEN;
1125  _activeworkers--;
1126  if (!_activeworkers && !(urliter != urllist.end() && _workers.size() < MAXURLS))
1127  {
1128  // end of workers reached! goodbye!
1129  worker->evaluateCurlCode(Pathname(), cc, false);
1130  }
1131  }
1132 
1133  if ( _filesize > 0 && _fetchedgoodsize > _filesize ) {
1135  }
1136  }
1137 
1138  // send report
1139  if (_report)
1140  {
1141  int percent = _totalsize ? (100 * (_fetchedgoodsize + _fetchedsize)) / (_totalsize + _fetchedsize) : 0;
1142 
1143  double avg = 0;
1144  if (now > _starttime)
1145  avg = _fetchedsize / (now - _starttime);
1146  if (!(*(_report))->progress(percent, _baseurl, avg, _lastperiodstart == _starttime ? avg : _periodavg))
1147  ZYPP_THROW(MediaCurlException(_baseurl, "User abort", "cancelled"));
1148  }
1149 
1150  if (_timeout && now - _lastprogress > _timeout)
1151  break;
1152  }
1153 
1154  if (!_finished)
1156 
1157  // print some download stats
1158  WAR << "overall result" << endl;
1159  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
1160  {
1161  multifetchworker *worker = *workeriter;
1162  WAR << "#" << worker->_workerno << ": state: " << worker->_state << " received: " << worker->_received << " url: " << worker->_url << endl;
1163  }
1164 }
1165 
1166 inline size_t multifetchrequest::makeBlksize ( size_t filesize )
1167 {
1168  // this case should never happen because we never start a multi download if we do not know the filesize beforehand
1169  if ( filesize == 0 ) return 2 * 1024 * 1024;
1170  else if ( filesize < 2*256*1024 ) return filesize;
1171  else if ( filesize < 8*1024*1024 ) return 256*1024;
1172  else if ( filesize < 256*1024*1024 ) return 1024*1024;
1173  return 4*1024*1024;
1174 }
1175 
1177 
1178 
1179 MediaMultiCurl::MediaMultiCurl(const Url &url_r, const Pathname & attach_point_hint_r)
1180  : MediaCurl(url_r, attach_point_hint_r)
1181 {
1182  MIL << "MediaMultiCurl::MediaMultiCurl(" << url_r << ", " << attach_point_hint_r << ")" << endl;
1183  _multi = 0;
1185 }
1186 
1188 {
1190  {
1191  curl_slist_free_all(_customHeadersMetalink);
1193  }
1194  if (_multi)
1195  {
1196  curl_multi_cleanup(_multi);
1197  _multi = 0;
1198  }
1199  std::map<std::string, CURL *>::iterator it;
1200  for (it = _easypool.begin(); it != _easypool.end(); it++)
1201  {
1202  CURL *easy = it->second;
1203  if (easy)
1204  {
1205  curl_easy_cleanup(easy);
1206  it->second = NULL;
1207  }
1208  }
1209 }
1210 
1212 {
1214 
1216  {
1217  curl_slist_free_all(_customHeadersMetalink);
1219  }
1220  struct curl_slist *sl = _customHeaders;
1221  for (; sl; sl = sl->next)
1222  _customHeadersMetalink = curl_slist_append(_customHeadersMetalink, sl->data);
1223  _customHeadersMetalink = curl_slist_append(_customHeadersMetalink, "Accept: */*, application/metalink+xml, application/metalink4+xml");
1224 }
1225 
1226 static bool looks_like_metalink_fd(int fd)
1227 {
1228  char buf[256], *p;
1229  int l;
1230  while ((l = pread(fd, buf, sizeof(buf) - 1, (off_t)0)) == -1 && errno == EINTR)
1231  ;
1232  if (l == -1)
1233  return 0;
1234  buf[l] = 0;
1235  p = buf;
1236  while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')
1237  p++;
1238  if (!strncasecmp(p, "<?xml", 5))
1239  {
1240  while (*p && *p != '>')
1241  p++;
1242  if (*p == '>')
1243  p++;
1244  while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')
1245  p++;
1246  }
1247  bool ret = !strncasecmp(p, "<metalink", 9) ? true : false;
1248  return ret;
1249 }
1250 
1251 static bool looks_like_metalink(const Pathname & file)
1252 {
1253  int fd;
1254  if ((fd = open(file.asString().c_str(), O_RDONLY|O_CLOEXEC)) == -1)
1255  return false;
1256  bool ret = looks_like_metalink_fd(fd);
1257  close(fd);
1258  DBG << "looks_like_metalink(" << file << "): " << ret << endl;
1259  return ret;
1260 }
1261 
1262 // here we try to suppress all progress coming from a metalink download
1263 // bsc#1021291: Nevertheless send alive trigger (without stats), so UIs
1264 // are able to abort a hanging metalink download via callback response.
1265 int MediaMultiCurl::progressCallback( void *clientp, double dltotal, double dlnow, double ultotal, double ulnow)
1266 {
1267  CURL *_curl = MediaCurl::progressCallback_getcurl(clientp);
1268  if (!_curl)
1269  return MediaCurl::aliveCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1270 
1271  // bsc#408814: Don't report any sizes before we don't have data on disk. Data reported
1272  // due to redirection etc. are not interesting, but may disturb filesize checks.
1273  FILE *fp = 0;
1274  if ( curl_easy_getinfo( _curl, CURLINFO_PRIVATE, &fp ) != CURLE_OK || !fp )
1275  return MediaCurl::aliveCallback( clientp, dltotal, dlnow, ultotal, ulnow );
1276  if ( ftell( fp ) == 0 )
1277  return MediaCurl::aliveCallback( clientp, dltotal, 0.0, ultotal, ulnow );
1278 
1279  // (no longer needed due to the filesize check above?)
1280  // work around curl bug that gives us old data
1281  long httpReturnCode = 0;
1282  if (curl_easy_getinfo(_curl, CURLINFO_RESPONSE_CODE, &httpReturnCode ) != CURLE_OK || httpReturnCode == 0)
1283  return MediaCurl::aliveCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1284 
1285  char *ptr = NULL;
1286  bool ismetalink = false;
1287  if (curl_easy_getinfo(_curl, CURLINFO_CONTENT_TYPE, &ptr) == CURLE_OK && ptr)
1288  {
1289  std::string ct = std::string(ptr);
1290  if (ct.find("application/metalink+xml") == 0 || ct.find("application/metalink4+xml") == 0)
1291  ismetalink = true;
1292  }
1293  if (!ismetalink && dlnow < 256)
1294  {
1295  // can't tell yet, ...
1296  return MediaCurl::aliveCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1297  }
1298  if (!ismetalink)
1299  {
1300  fflush(fp);
1301  ismetalink = looks_like_metalink_fd(fileno(fp));
1302  DBG << "looks_like_metalink_fd: " << ismetalink << endl;
1303  }
1304  if (ismetalink)
1305  {
1306  // this is a metalink file change the expected filesize
1308  // we're downloading the metalink file. Just trigger aliveCallbacks
1309  curl_easy_setopt(_curl, CURLOPT_PROGRESSFUNCTION, &MediaCurl::aliveCallback);
1310  return MediaCurl::aliveCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1311  }
1312  curl_easy_setopt(_curl, CURLOPT_PROGRESSFUNCTION, &MediaCurl::progressCallback);
1313  return MediaCurl::progressCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1314 }
1315 
1316 void MediaMultiCurl::doGetFileCopy( const OnMediaLocation &srcFile , const Pathname & target, callback::SendReport<DownloadProgressReport> & report, RequestOptions options ) const
1317 {
1318  Pathname dest = target.absolutename();
1319  if( assert_dir( dest.dirname() ) )
1320  {
1321  DBG << "assert_dir " << dest.dirname() << " failed" << endl;
1322  ZYPP_THROW( MediaSystemException(getFileUrl(srcFile.filename()), "System error on " + dest.dirname().asString()) );
1323  }
1324 
1325  ManagedFile destNew { target.extend( ".new.zypp.XXXXXX" ) };
1326  AutoFILE file;
1327  {
1328  AutoFREE<char> buf { ::strdup( (*destNew).c_str() ) };
1329  if( ! buf )
1330  {
1331  ERR << "out of memory for temp file name" << endl;
1332  ZYPP_THROW(MediaSystemException(getFileUrl(srcFile.filename()), "out of memory for temp file name"));
1333  }
1334 
1335  AutoFD tmp_fd { ::mkostemp( buf, O_CLOEXEC ) };
1336  if( tmp_fd == -1 )
1337  {
1338  ERR << "mkstemp failed for file '" << destNew << "'" << endl;
1339  ZYPP_THROW(MediaWriteException(destNew));
1340  }
1341  destNew = ManagedFile( (*buf), filesystem::unlink );
1342 
1343  file = ::fdopen( tmp_fd, "we" );
1344  if ( ! file )
1345  {
1346  ERR << "fopen failed for file '" << destNew << "'" << endl;
1347  ZYPP_THROW(MediaWriteException(destNew));
1348  }
1349  tmp_fd.resetDispose(); // don't close it here! ::fdopen moved ownership to file
1350  }
1351 
1352  DBG << "dest: " << dest << endl;
1353  DBG << "temp: " << destNew << endl;
1354 
1355  // set IFMODSINCE time condition (no download if not modified)
1356  if( PathInfo(target).isExist() && !(options & OPTION_NO_IFMODSINCE) )
1357  {
1358  curl_easy_setopt(_curl, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
1359  curl_easy_setopt(_curl, CURLOPT_TIMEVALUE, (long)PathInfo(target).mtime());
1360  }
1361  else
1362  {
1363  curl_easy_setopt(_curl, CURLOPT_TIMECONDITION, CURL_TIMECOND_NONE);
1364  curl_easy_setopt(_curl, CURLOPT_TIMEVALUE, 0L);
1365  }
1366  // change header to include Accept: metalink
1367  curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, _customHeadersMetalink);
1368  // change to our own progress funcion
1369  curl_easy_setopt(_curl, CURLOPT_PROGRESSFUNCTION, &progressCallback);
1370  curl_easy_setopt(_curl, CURLOPT_PRIVATE, (*file) ); // important to pass the FILE* explicitly (passing through varargs)
1371  try
1372  {
1373  MediaCurl::doGetFileCopyFile( srcFile, dest, file, report, options );
1374  }
1375  catch (Exception &ex)
1376  {
1377  curl_easy_setopt(_curl, CURLOPT_TIMECONDITION, CURL_TIMECOND_NONE);
1378  curl_easy_setopt(_curl, CURLOPT_TIMEVALUE, 0L);
1379  curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, _customHeaders);
1380  curl_easy_setopt(_curl, CURLOPT_PRIVATE, (void *)0);
1381  ZYPP_RETHROW(ex);
1382  }
1383  curl_easy_setopt(_curl, CURLOPT_TIMECONDITION, CURL_TIMECOND_NONE);
1384  curl_easy_setopt(_curl, CURLOPT_TIMEVALUE, 0L);
1385  curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, _customHeaders);
1386  curl_easy_setopt(_curl, CURLOPT_PRIVATE, (void *)0);
1387  long httpReturnCode = 0;
1388  CURLcode infoRet = curl_easy_getinfo(_curl, CURLINFO_RESPONSE_CODE, &httpReturnCode);
1389  if (infoRet == CURLE_OK)
1390  {
1391  DBG << "HTTP response: " + str::numstring(httpReturnCode) << endl;
1392  if ( httpReturnCode == 304
1393  || ( httpReturnCode == 213 && _url.getScheme() == "ftp" ) ) // not modified
1394  {
1395  DBG << "not modified: " << PathInfo(dest) << endl;
1396  return;
1397  }
1398  }
1399  else
1400  {
1401  WAR << "Could not get the response code." << endl;
1402  }
1403 
1404  bool ismetalink = false;
1405 
1406  char *ptr = NULL;
1407  if (curl_easy_getinfo(_curl, CURLINFO_CONTENT_TYPE, &ptr) == CURLE_OK && ptr)
1408  {
1409  std::string ct = std::string(ptr);
1410  if (ct.find("application/metalink+xml") == 0 || ct.find("application/metalink4+xml") == 0)
1411  ismetalink = true;
1412  }
1413 
1414  if (!ismetalink)
1415  {
1416  // some proxies do not store the content type, so also look at the file to find
1417  // out if we received a metalink (bnc#649925)
1418  fflush(file);
1419  if (looks_like_metalink(destNew))
1420  ismetalink = true;
1421  }
1422 
1423  if (ismetalink)
1424  {
1425  bool userabort = false;
1426  Pathname failedFile = ZConfig::instance().repoCachePath() / "MultiCurl.failed";
1427  file = nullptr; // explicitly close destNew before the parser reads it.
1428  try
1429  {
1430  MetaLinkParser mlp;
1431  mlp.parse(destNew);
1432  MediaBlockList bl = mlp.getBlockList();
1433 
1434  /*
1435  * gihub issue libzipp:#277 Multicurl backend breaks with MirrorCache and Metalink with unknown filesize.
1436  * Fall back to a normal download if we have no knowledge about the filesize we want to download.
1437  */
1438  if ( !bl.haveFilesize() && ! srcFile.downloadSize() ) {
1439  XXX << "No filesize in metalink file and no expected filesize, aborting multicurl." << std::endl;
1440  ZYPP_THROW( MediaException("Multicurl requires filesize but none was provided.") );
1441  }
1442 
1443  std::vector<Url> urls = mlp.getUrls();
1444  /*
1445  * bsc#1191609 In certain locations we do not receive a suitable number of metalink mirrors, and might even
1446  * download chunks serially from one and the same server. In those cases we need to fall back to a normal download.
1447  */
1448  if ( urls.size() < MIN_REQ_MIRRS ) {
1449  ZYPP_THROW( MediaException("Multicurl enabled but not enough mirrors provided") );
1450  }
1451 
1452  XXX << bl << endl;
1453  file = fopen((*destNew).c_str(), "w+e");
1454  if (!file)
1455  ZYPP_THROW(MediaWriteException(destNew));
1456  if (PathInfo(target).isExist())
1457  {
1458  XXX << "reusing blocks from file " << target << endl;
1459  bl.reuseBlocks(file, target.asString());
1460  XXX << bl << endl;
1461  }
1462  if (bl.haveChecksum(1) && PathInfo(failedFile).isExist())
1463  {
1464  XXX << "reusing blocks from file " << failedFile << endl;
1465  bl.reuseBlocks(file, failedFile.asString());
1466  XXX << bl << endl;
1467  filesystem::unlink(failedFile);
1468  }
1469  Pathname df = srcFile.deltafile();
1470  if (!df.empty())
1471  {
1472  XXX << "reusing blocks from file " << df << endl;
1473  bl.reuseBlocks(file, df.asString());
1474  XXX << bl << endl;
1475  }
1476  try
1477  {
1478  multifetch(srcFile.filename(), file, &urls, &report, &bl, srcFile.downloadSize());
1479  }
1480  catch (MediaCurlException &ex)
1481  {
1482  userabort = ex.errstr() == "User abort";
1483  ZYPP_RETHROW(ex);
1484  }
1485  }
1486  catch (MediaFileSizeExceededException &ex) {
1487  ZYPP_RETHROW(ex);
1488  }
1489  catch (Exception &ex)
1490  {
1491  // something went wrong. fall back to normal download
1492  file = nullptr; // explicitly close destNew before moving it
1493  if (PathInfo(destNew).size() >= 63336)
1494  {
1495  ::unlink(failedFile.asString().c_str());
1496  filesystem::hardlinkCopy(destNew, failedFile);
1497  }
1498  if (userabort)
1499  {
1500  ZYPP_RETHROW(ex);
1501  }
1502  file = fopen((*destNew).c_str(), "w+e");
1503  if (!file)
1504  ZYPP_THROW(MediaWriteException(destNew));
1505 
1506  // use the default progressCallback
1507  curl_easy_setopt(_curl, CURLOPT_PROGRESSFUNCTION, &MediaCurl::progressCallback);
1508  MediaCurl::doGetFileCopyFile(srcFile, dest, file, report, options | OPTION_NO_REPORT_START);
1509  }
1510  }
1511 
1512  if (::fchmod( ::fileno(file), filesystem::applyUmaskTo( 0644 )))
1513  {
1514  ERR << "Failed to chmod file " << destNew << endl;
1515  }
1516 
1517  file.resetDispose(); // we're going to close it manually here
1518  if (::fclose(file))
1519  {
1520  filesystem::unlink(destNew);
1521  ERR << "Fclose failed for file '" << destNew << "'" << endl;
1522  ZYPP_THROW(MediaWriteException(destNew));
1523  }
1524 
1525  if ( rename( destNew, dest ) != 0 )
1526  {
1527  ERR << "Rename failed" << endl;
1529  }
1530  destNew.resetDispose(); // no more need to unlink it
1531 
1532  DBG << "done: " << PathInfo(dest) << endl;
1533 }
1534 
1535 void MediaMultiCurl::multifetch(const Pathname & filename, FILE *fp, std::vector<Url> *urllist, callback::SendReport<DownloadProgressReport> *report, MediaBlockList *blklist, off_t filesize) const
1536 {
1537  Url baseurl(getFileUrl(filename));
1538  if (blklist && filesize == off_t(-1) && blklist->haveFilesize())
1539  filesize = blklist->getFilesize();
1540  if (blklist && !blklist->haveBlocks() && filesize != 0)
1541  blklist = 0;
1542  if (blklist && (filesize == 0 || !blklist->numBlocks()))
1543  {
1544  checkFileDigest(baseurl, fp, blklist);
1545  return;
1546  }
1547  if (filesize == 0)
1548  return;
1549  if (!_multi)
1550  {
1551  _multi = curl_multi_init();
1552  if (!_multi)
1554  }
1555 
1556  multifetchrequest req(this, filename, baseurl, _multi, fp, report, blklist, filesize);
1557  req._timeout = _settings.timeout();
1561  if (req._maxworkers > MAXURLS)
1562  req._maxworkers = MAXURLS;
1563  if (req._maxworkers <= 0)
1564  req._maxworkers = 1;
1565  std::vector<Url> myurllist;
1566  for (std::vector<Url>::iterator urliter = urllist->begin(); urliter != urllist->end(); ++urliter)
1567  {
1568  try
1569  {
1570  std::string scheme = urliter->getScheme();
1571  if (scheme == "http" || scheme == "https" || scheme == "ftp" || scheme == "tftp")
1572  {
1573  checkProtocol(*urliter);
1574  myurllist.push_back(internal::propagateQueryParams(*urliter, _url));
1575  }
1576  }
1577  catch (...)
1578  {
1579  }
1580  }
1581  if (!myurllist.size())
1582  myurllist.push_back(baseurl);
1583  req.run(myurllist);
1584  checkFileDigest(baseurl, fp, blklist);
1585 }
1586 
1587 void MediaMultiCurl::checkFileDigest(Url &url, FILE *fp, MediaBlockList *blklist) const
1588 {
1589  if (!blklist || !blklist->haveFileChecksum())
1590  return;
1591  if (fseeko(fp, off_t(0), SEEK_SET))
1592  ZYPP_THROW(MediaCurlException(url, "fseeko", "seek error"));
1593  Digest dig;
1594  blklist->createFileDigest(dig);
1595  char buf[4096];
1596  size_t l;
1597  while ((l = fread(buf, 1, sizeof(buf), fp)) > 0)
1598  dig.update(buf, l);
1599  if (!blklist->verifyFileDigest(dig))
1600  ZYPP_THROW(MediaCurlException(url, "file verification failed", "checksum error"));
1601 }
1602 
1603 bool MediaMultiCurl::isDNSok(const std::string &host) const
1604 {
1605  return _dnsok.find(host) == _dnsok.end() ? false : true;
1606 }
1607 
1608 void MediaMultiCurl::setDNSok(const std::string &host) const
1609 {
1610  _dnsok.insert(host);
1611 }
1612 
1613 CURL *MediaMultiCurl::fromEasyPool(const std::string &host) const
1614 {
1615  if (_easypool.find(host) == _easypool.end())
1616  return 0;
1617  CURL *ret = _easypool[host];
1618  _easypool.erase(host);
1619  return ret;
1620 }
1621 
1622 void MediaMultiCurl::toEasyPool(const std::string &host, CURL *easy) const
1623 {
1624  CURL *oldeasy = _easypool[host];
1625  _easypool[host] = easy;
1626  if (oldeasy)
1627  curl_easy_cleanup(oldeasy);
1628 }
1629 
1630  } // namespace media
1631 } // namespace zypp
std::string getScheme() const
Returns the scheme name of the URL.
Definition: Url.cc:533
long timeout() const
transfer timeout
Url getFileUrl(const Pathname &filename) const
concatenate the attach url and the filename to a complete download url
MediaBlockList getBlockList() const
return the block list from the parsed metalink data
int assert_dir(const Pathname &path, unsigned mode)
Like &#39;mkdir -p&#39;.
Definition: PathInfo.cc:319
#define MIL
Definition: Logger.h:96
constexpr auto MAXURLS
virtual void doGetFileCopy(const OnMediaLocation &srcFile, const Pathname &targetFilename, callback::SendReport< DownloadProgressReport > &_report, RequestOptions options=OPTION_NONE) const override
#define WORKER_DISCARD
static size_t _headerfunction(void *ptr, size_t size, size_t nmemb, void *stream)
std::set< std::string > _dnsok
#define ZYPP_THROW(EXCPT)
Drops a logline and throws the Exception.
Definition: Exception.h:428
Describes a resource file located on a medium.
size_t writefunction(void *ptr, size_t size)
static ZConfig & instance()
Singleton ctor.
Definition: ZConfig.cc:832
void checkProtocol(const Url &url) const
check the url is supported by the curl library
Definition: MediaCurl.cc:382
Implementation class for FTP, HTTP and HTTPS MediaHandler.
Definition: MediaCurl.h:31
zypp::Url propagateQueryParams(zypp::Url url_r, const zypp::Url &template_r)
Definition: curlhelper.cc:358
void setPassword(const std::string &val_r)
sets the auth password
Compute Message Digests (MD5, SHA1 etc)
Definition: Digest.h:36
#define WORKER_SLEEP
Store and operate with byte count.
Definition: ByteCount.h:30
long maxDownloadSpeed() const
Maximum download speed (bytes per second)
to not add a IFMODSINCE header if target exists
Definition: MediaCurl.h:43
void reuseBlocks(FILE *wfp, std::string filename)
void run(std::vector< Url > &urllist)
const std::string & authType() const
get the allowed authentication types
static int progressCallback(void *clientp, double dltotal, double dlnow, double ultotal, double ulnow)
Callback reporting download progress.
Definition: MediaCurl.cc:1418
Pathname extend(const std::string &r) const
Append string r to the last component of the path.
Definition: Pathname.h:173
static size_t makeBlksize(size_t filesize)
bool createDigest(Digest &digest) const
std::map< std::string, CURL * > _easypool
callback::SendReport< DownloadProgressReport > * _report
void setUsername(const std::string &val_r)
sets the auth username
#define WORKER_STARTING
static int aliveCallback(void *clientp, double dltotal, double dlnow, double ultotal, double ulnow)
Callback sending just an alive trigger to the UI, without stats (e.g.
Definition: MediaCurl.cc:1404
static double currentTime()
AutoDispose< const Pathname > ManagedFile
A Pathname plus associated cleanup code to be executed when path is no longer needed.
Definition: ManagedFile.h:27
#define XXX
Definition: Logger.h:94
static const Unit MB
1000^2 Byte
Definition: ByteCount.h:60
virtual void setupEasy()
initializes the curl easy handle with the data from the url
Definition: MediaCurl.cc:407
void toEasyPool(const std::string &host, CURL *easy) const
AutoDispose<int> calling ::close
Definition: AutoDispose.h:300
const std::string & password() const
auth password
void parse(const Pathname &filename)
parse a file consisting of metalink xml data
static size_t _writefunction(void *ptr, size_t size, size_t nmemb, void *stream)
#define ERR
Definition: Logger.h:98
const std::string & username() const
auth username
multifetchrequest(const MediaMultiCurl *context, const Pathname &filename, const Url &baseurl, CURLM *multi, FILE *fp, callback::SendReport< DownloadProgressReport > *report, MediaBlockList *blklist, off_t filesize)
std::optional< KeyManagerCtx > _context
Definition: KeyRing.cc:157
#define WORKER_FETCH
bool haveBlocks() const
do we have a blocklist describing the file? set to true when addBlock() is called ...
static void resetExpectedFileSize(void *clientp, const ByteCount &expectedFileSize)
MediaMultiCurl needs to reset the expected filesize in case a metalink file is downloaded otherwise t...
Definition: MediaCurl.cc:1461
std::list< multifetchworker * > _workers
#define ZYPP_RETHROW(EXCPT)
Drops a logline and rethrows, updating the CodeLocation.
Definition: Exception.h:440
const MediaMultiCurl * _context
virtual void setupEasy() override
initializes the curl easy handle with the data from the url
std::string asString() const
Returns a default string representation of the Url object.
Definition: Url.cc:497
bool verifyFileDigest(Digest &digest) const
Url clearQueryString(const Url &url) const
Definition: MediaCurl.cc:370
void setAuthType(const std::string &val_r)
set the allowed authentication types
CURL * fromEasyPool(const std::string &host) const
int unlink(const Pathname &path)
Like &#39;unlink&#39;.
Definition: PathInfo.cc:700
multifetchrequest * _request
static bool looks_like_metalink_fd(int fd)
const Url _url
Url to handle.
Definition: MediaHandler.h:113
bool isDNSok(const std::string &host) const
const std::string & asString() const
String representation.
Definition: Pathname.h:91
int rename(const Pathname &oldpath, const Pathname &newpath)
Like &#39;rename&#39;.
Definition: PathInfo.cc:742
bool isExist() const
Return whether valid stat info exists.
Definition: PathInfo.h:281
Just inherits Exception to separate media exceptions.
void evaluateCurlCode(const zypp::Pathname &filename, CURLcode code, bool timeout) const
Evaluates a curl return code and throws the right MediaException filename Filename being downloaded c...
Definition: MediaCurl.cc:840
Pathname repoCachePath() const
Path where the caches are kept (/var/cache/zypp)
Definition: ZConfig.cc:949
const ByteCount & downloadSize() const
The size of the resource on the server.
long connectTimeout() const
connection timeout
Pathname dirname() const
Return all but the last component od this path.
Definition: Pathname.h:124
do not send a start ProgressReport
Definition: MediaCurl.h:45
#define WAR
Definition: Logger.h:97
bool verifyDigest(size_t blkno, Digest &digest) const
int hardlinkCopy(const Pathname &oldpath, const Pathname &newpath)
Create newpath as hardlink or copy of oldpath.
Definition: PathInfo.cc:883
size_t headerfunction(char *ptr, size_t size)
a single block from the blocklist, consisting of an offset and a size
bool createFileDigest(Digest &digest) const
void setDNSok(const std::string &host) const
size_t numBlocks() const
return the number of blocks in the blocklist
void multifetch(const Pathname &filename, FILE *fp, std::vector< Url > *urllist, callback::SendReport< DownloadProgressReport > *report=0, MediaBlockList *blklist=0, off_t filesize=off_t(-1)) const
const Pathname & filename() const
The path to the resource on the medium.
std::string numstring(char n, int w=0)
Definition: String.h:289
std::string asString(unsigned field_width_r=0, unsigned unit_width_r=1) const
Auto selected Unit and precision.
Definition: ByteCount.h:133
void resetDispose()
Set no dispose function.
Definition: AutoDispose.h:180
void doGetFileCopyFile(const OnMediaLocation &srcFile, const Pathname &dest, FILE *file, callback::SendReport< DownloadProgressReport > &report, RequestOptions options=OPTION_NONE) const
Definition: MediaCurl.cc:1248
curl_slist * _customHeaders
Definition: MediaCurl.h:170
static bool looks_like_metalink(const Pathname &file)
const Pathname & deltafile() const
The existing deltafile that can be used to reduce download size ( zchunk or metalink ) ...
MediaMultiCurl(const Url &url_r, const Pathname &attach_point_hint_r)
Pathname absolutename() const
Return this path, adding a leading &#39;/&#39; if relative.
Definition: Pathname.h:139
Base class for Exception.
Definition: Exception.h:145
void checkFileDigest(Url &url, FILE *fp, MediaBlockList *blklist) const
Url url() const
Url used.
Definition: MediaHandler.h:503
const MediaBlock & getBlock(size_t blkno) const
return the offset/size of a block with number blkno
long maxConcurrentConnections() const
Maximum number of concurrent connections for a single transfer.
std::string getHost(EEncoding eflag=zypp::url::E_DECODED) const
Returns the hostname or IP from the URL authority.
Definition: Url.cc:588
curl_slist * _customHeadersMetalink
virtual void disconnectFrom() override
Definition: MediaCurl.cc:710
static CURL * progressCallback_getcurl(void *clientp)
Definition: MediaCurl.cc:1434
static long auth_type_str2long(std::string &auth_type_str)
Converts a string of comma separated list of authetication type names into a long of ORed CURLAUTH_* ...
Definition: curlauthdata.cc:50
bool haveChecksum(size_t blkno) const
#define WORKER_LOOKUP
Wrapper class for ::stat/::lstat.
Definition: PathInfo.h:220
constexpr auto MIN_REQ_MIRRS
AutoDispose<FILE*> calling ::fclose
Definition: AutoDispose.h:311
AutoDispose< void * > _state
void adddnsfd(fd_set &rset, int &maxfd)
#define WORKER_BROKEN
mode_t applyUmaskTo(mode_t mode_r)
Modify mode_r according to the current umask ( mode_r & ~getUmask() ).
Definition: PathInfo.h:789
std::vector< Url > getUrls() const
return the download urls from the parsed metalink data
std::string userPassword() const
returns the user and password as a user:pass string
static int progressCallback(void *clientp, double dltotal, double dlnow, double ultotal, double ulnow)
Easy-to use interface to the ZYPP dependency resolver.
Definition: CodePitfalls.doc:1
#define WORKER_DONE
char _curlError[CURL_ERROR_SIZE]
Definition: MediaCurl.h:169
static bool env_isset(std::string name)
const std::string & proxy() const
proxy host
bool update(const char *bytes, size_t len)
feed data into digest computation algorithm
Definition: Digest.cc:252
Url manipulation class.
Definition: Url.h:91
#define DBG
Definition: Logger.h:95
ByteCount df(const Pathname &path_r)
Report free disk space on a mounted file system.
Definition: PathInfo.cc:1155