Codebase list instaloader / 542bf8a
New upstream version 4.4.5 Joseph O'Gorman 3 years ago
20 changed file(s) with 205 addition(s) and 100 deletion(s). Raw diff Collapse all Expand all
2323 **Additional context**
2424 Add any other context about the problem here.
2525
26 (please also see https://instaloader.github.io/contributing.html for how to report a bug)
26 <!-- please also see https://instaloader.github.io/contributing.html for how to report a bug -->
0 blank_issues_enabled: false
1 contact_links:
2 - name: Contributing to Instaloader
3 url: https://instaloader.github.io/contributing.html
4 about: Guidelines for how to report an issue.
5 - name: Instaloader Troubleshooting
6 url: https://instaloader.github.io/troubleshooting.html
7 about: Many common problems and their workarounds are described here.
8 - name: Instaloader Documentation
9 url: https://instaloader.github.io/
10 about: Many questions might be already answered here.
1414 A clear and concise description of any alternative solutions or features you've considered.
1515
1616 **If the feature request is accepted, would you be willing to submit a PR?**
17
18 Yes / No _(Help can be provided if you need assistance submitting a PR)_
17 Yes / No
18 <!-- Help can be provided if you need assistance submitting a PR -->
1919
2020 **Additional context**
2121 Add any other context about the feature request here.
2222
23 (please also see https://instaloader.github.io/contributing.html for how to suggest a feature)
23 <!-- please also see https://instaloader.github.io/contributing.html for how to suggest a feature -->
55 ---
66
77 Your question here...
8
9 <!-- Please see our documentation: https://instaloader.github.io/ -->
10
11 <!-- Basic questions regarding Instaloader's usage might be more appropriate to ask on Stack Overflow -->
0
1
2 <!--
3 Please describe:
4
5 - A motivation for this change, e.g.
6 - Fixes # .
7 - More general: What problem does the pull request solve?
8
9 - The changes proposed in this pull request
10
11 - The completeness of this change
12 - Is it just a proof of concept?
13 - Is the documentation updated (if appropriate)?
14 - Do you consider it ready to be merged or is it a draft?
15 - Can we help you at some point?
16 -->
0 name: PyLint and MyPy
1
2 on: [push, pull_request]
3
4 jobs:
5 lint:
6 name: PyLint and MyPy
7 runs-on: ubuntu-latest
8 strategy:
9 matrix:
10 python-version: [3.8]
11 steps:
12 - name: Checkout Instaloader Repository
13 uses: actions/checkout@v2
14 with:
15 fetch-depth: 0 # needed for building docs
16 - name: Setup Python
17 uses: actions/setup-python@v2
18 with:
19 python-version: ${{ matrix.python-version }}
20 - name: Install Dependencies
21 run: |
22 python -m pip install pipenv==2020.6.2
23 pipenv sync --dev
24 - name: PyLint
25 run: pipenv run pylint instaloader
26 - name: MyPy
27 run: pipenv run mypy -m instaloader
28 - name: Build Documentation
29 run: pipenv run make -C docs html SPHINXOPTS="-W -n"
66 jobs:
77 stale:
88
9 name: Mark stale issues and pull requests
910 runs-on: ubuntu-latest
1011
1112 steps:
33
44 jobs:
55 build:
6 name: Windows EXE build
67 runs-on: windows-latest
78 steps:
89 - name: Checkout Instaloader repository
2626 repo: instaloader/instaloader.github.io
2727 target-branch: master
2828 on:
29 tags: true
29 branch: master
3030 python: 3.6
3131 - provider: script
3232 script: deploy/arch/deploy.sh $TRAVIS_TAG
1616 .highlight pre {
1717 padding: 0.7em;
1818 color: #fff; }
19 .highlight .c1, .highlight .k, .highlight .kn, .highlight .ow {
19 .highlight .c1 {
20 color: #666; }
21 .highlight .k, .highlight .kn, .highlight .ow {
2022 color: #008d06; }
2123 .highlight .nb, .highlight .ne, .highlight .nf, .highlight .vm {
2224 color: #f48400; }
3535 color: #fff;
3636 }
3737
38 .c1, .k, .kn, .ow {
38 .c1 {
39 color: #666;
40 }
41
42 .k, .kn, .ow {
3943 color: $color_instaloader_main
4044 }
4145
110110 ``profile`` (requires :option:`--login`).
111111
112112 - ``-post``
113 The single **post** with the given shortcode. Must be preceeded by ``--`` in
114 the argument list to not be mistaken as an option flag.
113 Replace **post** with the post's shortcode to download single post. Must be preceeded by ``--`` in
114 the argument list to not be mistaken as an option flag::
115
116 instaloader -- -B_K4CykAOtf
117
118
115119
116120 .. versionadded:: 4.1
117121
44
55 L = instaloader.Instaloader()
66
7 posts = instaloader.Hashtag.from_name(L.context, 'urbanphotography').get_posts()
8 # or
9 # posts = instaloader.Profile.from_username(L.context, PROFILE).get_posts()
7 posts = instaloader.Profile.from_username(L.context, "instagram").get_posts()
108
119 SINCE = datetime(2015, 5, 1)
1210 UNTIL = datetime(2015, 3, 1)
1311
1412 for post in takewhile(lambda p: p.date > UNTIL, dropwhile(lambda p: p.date > SINCE, posts)):
1513 print(post.date)
16 L.download_post(post, '#urbanphotography')
14 L.download_post(post, "instagram")
0 from datetime import datetime
1 import instaloader
2
3 L = instaloader.Instaloader()
4
5 posts = instaloader.Hashtag.from_name(L.context, "urbanphotography").get_posts()
6
7 SINCE = datetime(2020, 5, 10) # further from today, inclusive
8 UNTIL = datetime(2020, 5, 11) # closer to today, not inclusive
9
10 k = 0 # initiate k
11 k_list = [] # uncomment this to tune k
12
13 for post in posts:
14 postdate = post.date
15
16 if postdate > UNTIL:
17 continue
18 elif postdate <= SINCE:
19 k += 1
20 if k == 50:
21 break
22 else:
23 continue
24 else:
25 L.download_post(post, "#urbanphotography")
26 k = 0 # set k to 0
27 # if you want to tune k, uncomment below to get your k max
28 #k_list.append(k)
29 #max(k_list)
2727 -----------------------------------
2828
2929 To only download Instagram pictures (and metadata) that are within a specific
30 period, you can play around with :func:`~itertools.dropwhile` and
31 :func:`~itertools.takewhile` from :mod:`itertools` like in this snippet.
30 period, you can simply use :func:`~itertools.dropwhile` and
31 :func:`~itertools.takewhile` from :mod:`itertools` on a generator that returns
32 Posts in **exact chronological order**, such as :meth:`Profile.get_posts`.
3233
3334 .. literalinclude:: codesnippets/121_since_until.py
3435
3536 See also :class:`Post`, :meth:`Instaloader.download_post`.
3637
3738 Discussed in :issue:`121`.
39
40 The code example with :func:`~itertools.dropwhile` and
41 :func:`~itertools.takewhile` makes the assumption that the post iterator returns
42 posts in exact chronological order. As discussed in :issue:`666`, the following
43 approach fits for an **almost chronological order**, where up to *k* older posts
44 are inserted into an otherwise chronological order, such as an Hashtag feed.
45
46 .. literalinclude:: codesnippets/666_historical_hashtag_data.py
3847
3948 Likes of a Profile / Ghost Followers
4049 ------------------------------------
3434 - If not obvious, describe **which behavior you expected**
3535 instead of what actually happened.
3636
37 - If you think an issue has been closed accidentally or inappropriately, feel
38 free to reopen it.
37 - If we have closed an issue apparently inadvertently or inappropriately, please
38 let us know.
3939
4040 Writing Code or Improving the Documentation
4141 -------------------------------------------
4444 `Pull Request <https://github.com/instaloader/instaloader/pulls>`__. There are only
4545 few things to consider:
4646
47 - Sometimes, the most current code is not in the ``master`` branch. Check that
48 you forked from the most recent branch.
47 - Base your Pull Request on the ``master`` branch if it fixes a bug,
48 or the ``upcoming/v4.X`` branch (if it exists at the moment of submitting the PR)
49 otherwise.
4950
5051 - We use `Pylint <https://www.pylint.org/>`__ for error and syntax checking of
51 the source. The file ``.travis.yml`` in the project's root directory
52 shows how it is invoked. Note that sometimes it might be better to disable a
53 warning rather than adapting the code to Pylint's desires.
52 the source and `MyPy <https://github.com/python/mypy>`__ for type checking.
53 Beware that sometimes it might be better to disable a warning rather than
54 adapting the code to a tool's desires.
5455
5556 - The documentation source is located in the ``docs`` folder. The file
5657 ``cli-options.rst`` is merely an RST-formatted copy of ``instaloader --help``
5758 output, of which the source is in ``instaloader/__main__.py``.
5859
59 - Feel free to contact us, even if you "only" have Proof-of-Concepts or
60 not-fully integrated changes. They already might be an advance for the
61 project.
62
63 Suggesting Features
64 -------------------
60 Proposing Features
61 ------------------
6562
6663 .. goal-start
6764
68 Instaloader's goal is to mimick the browser's behavior to access the data that
65 Instaloader's goal is to mimic the browser's behavior to access the data that
6966 is available through the Instagram web interface, pack this data into complete
7067 and easily-(re)usable python objects, and provide a user interface for the most
7168 common downloading and metadata collection tasks, without changing any of the
7370
7471 .. goal-end
7572
76 If you have an idea of how Instaloader should be enhanced, but do not want to
77 implement the feature yourself, feel free to open a ticket in our
78 `Issue Tracker <https://github.com/instaloader/instaloader/issues>`__.
79 Please consider the following:
73 Prior spending effort on implementing a new feature, it might be appropriate to
74 clarify how it could fit into the project's scope or discuss implementation
75 details. If you feel the need to do so, please create a "feature suggestion".
8076
8177 - Instaloader already has plenty of features. **Check the documentation**
8278 beforehand to ensure your desired suggestion is not already implemented.
8581 an issue suggesting the same or a similar feature, share your thoughts in a
8682 comment there, instead of opening a new issue.
8783
88 - If possible, provide us a **use case of the feature**: How could the user
89 invoke the new function? Which problem would it solve? If new information is
90 obtained, how would it be further processed?
84 - **Motivate the feature**, i.e.
9185
92 - If not obvious, briefly motivate how your suggested feature **conforms with
93 Instaloader's project goal**.
86 - Provide us a **use case of the feature**: How could the user
87 invoke the new function? Which problem would it solve? If new information is
88 obtained, how would it be further processed?
9489
95 - **Be patient**. Naturally, bugs and pull requests have a higher priority than
96 feature suggestions. Keep in mind that this is a free software project, and
97 unfortunately we only have limited time to work on it.
90 - Describe already-working **alternatives of the feature** and how they
91 compare to your proposed feature.
92
93 - Briefly describe how your suggested feature **conforms with Instaloader's
94 project goal**.
95
96 - Explain your **solution ideas**. Describe your ideas on how the feature could
97 be implemented and the underlying problem could be solved. Also **describe
98 alternatives** that you have considered.
9899
99100 Donations
100101 ---------
2626 happen in normal conditions, consider adjusting the
2727 :option:`--max-connection-attempts` option.
2828
29 There have been observations that services, that in their nature offer
30 promiscious IP addresses, such as cloud, VPN and public proxy services, might be
31 subject to significantly stricter limits for anonymous access. However,
32 logged-in accesses (see :option:`--login`) do not seem to be affected.
33
34 Too many queries in the last time
35 ---------------------------------
36
2937 **"Too many queries in the last time"** is not an error. It is a notice that the
3038 rate limit has almost been reached, according to Instaloader's own rate
3139 accounting mechanism. We regularly adjust this mechanism to match Instagram's
3240 current rate limiting.
41
42 Private but not followed
43 ------------------------
44
45 You have to follow a private account to access most of its associated
46 information.
3347
3448 Login Error
3549 -----------
00 """Download pictures (or videos) along with their captions and other metadata from Instagram."""
11
22
3 __version__ = '4.4.4'
3 __version__ = '4.4.5'
44
55
66 try:
392392 if ((format_string_contains_key(self.dirname_pattern, 'profile') or
393393 format_string_contains_key(self.dirname_pattern, 'target'))):
394394 profile_str = owner_profile.username.lower() if owner_profile is not None else target
395 filename = '{0}/{1}_{2}.{3}'.format(self.dirname_pattern.format(profile=profile_str,
396 target=target),
397 pic_identifier, name_suffix, pic_extension)
395 filename = os.path.join(self.dirname_pattern.format(profile=profile_str,
396 target=target),
397 '{0}_{1}.{2}'.format(pic_identifier, name_suffix, pic_extension))
398398 else:
399 filename = '{0}/{1}_{2}_{3}.{4}'.format(self.dirname_pattern.format(), target,
400 pic_identifier, name_suffix, pic_extension)
399 filename = os.path.join(self.dirname_pattern.format(),
400 '{0}_{1}_{2}.{3}'.format(target, pic_identifier, name_suffix, pic_extension))
401401 content_length = http_response.headers.get('Content-Length', None)
402402 if os.path.isfile(filename) and (not self.context.is_logged_in or
403403 (content_length is not None and
500500 """
501501
502502 dirname = _PostPathFormatter(post).format(self.dirname_pattern, target=target)
503 filename = dirname + '/' + self.format_filename(post, target=target)
503 filename = os.path.join(dirname, self.format_filename(post, target=target))
504504 os.makedirs(os.path.dirname(filename), exist_ok=True)
505505
506506 # Download the image(s) / video thumbnail and videos within sidecars if desired
629629
630630 date_local = item.date_local
631631 dirname = _PostPathFormatter(item).format(self.dirname_pattern, target=target)
632 filename = dirname + '/' + self.format_filename(item, target=target)
632 filename = os.path.join(dirname, self.format_filename(item, target=target))
633633 os.makedirs(os.path.dirname(filename), exist_ok=True)
634634 downloaded = False
635635 if not item.is_video or self.download_video_thumbnails is True:
973973 def _get_id_filename(self, profile_name: str) -> str:
974974 if ((format_string_contains_key(self.dirname_pattern, 'profile') or
975975 format_string_contains_key(self.dirname_pattern, 'target'))):
976 return '{0}/id'.format(self.dirname_pattern.format(profile=profile_name.lower(),
977 target=profile_name.lower()))
976 return os.path.join(self.dirname_pattern.format(profile=profile_name.lower(),
977 target=profile_name.lower()),
978 'id')
978979 else:
979 return '{0}/{1}_id'.format(self.dirname_pattern.format(), profile_name.lower())
980 return os.path.join(self.dirname_pattern.format(),
981 '{0}_id'.format(profile_name.lower()))
980982
981983 def save_profile_id(self, profile: Profile):
982984 """
10901092
10911093 # Save metadata as JSON if desired.
10921094 if self.save_metadata:
1093 json_filename = '{0}/{1}_{2}'.format(self.dirname_pattern.format(profile=profile_name,
1094 target=profile_name),
1095 profile_name, profile.userid)
1095 json_filename = os.path.join(self.dirname_pattern.format(profile=profile_name,
1096 target=profile_name),
1097 '{0}_{1}'.format(profile_name, profile.userid))
10961098 self.save_metadata_json(json_filename, profile)
10971099
10981100 # Catch some errors
6868 self._node = node
6969 self._owner_profile = owner_profile
7070 self._full_metadata_dict = None # type: Optional[Dict[str, Any]]
71 self._rhx_gis_str = None # type: Optional[str]
7271 self._location = None # type: Optional[PostLocation]
7372 self._iphone_struct_ = None
7473 if 'iphone_struct' in node:
141140
142141 def _obtain_metadata(self):
143142 if not self._full_metadata_dict:
144 pic_json = self._context.get_json("p/{0}/".format(self.shortcode), params={})
145 self._full_metadata_dict = pic_json['entry_data']['PostPage'][0]['graphql']['shortcode_media']
146 self._rhx_gis_str = pic_json.get('rhx_gis')
143 pic_json = self._context.graphql_query(
144 '2b0673e0dc4580674a88d426fe00ea90',
145 {'shortcode': self.shortcode}
146 )
147 self._full_metadata_dict = pic_json['data']['shortcode_media']
147148 if self._full_metadata_dict is None:
148149 # issue #449
149150 self._context.error("Fetching Post metadata failed (issue #449). "
159160 self._obtain_metadata()
160161 assert self._full_metadata_dict is not None
161162 return self._full_metadata_dict
162
163 @property
164 def _rhx_gis(self) -> Optional[str]:
165 self._obtain_metadata()
166 return self._rhx_gis_str
167163
168164 @property
169165 def _iphone_struct(self) -> Dict[str, Any]:
391387 created_at_utc=datetime.utcfromtimestamp(node['created_at']),
392388 text=node['text'],
393389 owner=Profile(self._context, node['owner']),
394 likes_count=node['edge_liked_by']['count'])
390 likes_count=node.get('edge_liked_by', {}).get('count', 0))
395391
396392 def _postcommentanswers(node):
397393 if 'edge_threaded_comments' not in node:
417413 if self.comments == 0:
418414 # Avoid doing additional requests if there are no comments
419415 return
420 try:
421 comment_edges = self._field('edge_media_to_parent_comment', 'edges')
422 answers_count = sum([edge['node']['edge_threaded_comments']['count'] for edge in comment_edges])
423 threaded_comments_available = True
424 except KeyError:
425 comment_edges = self._field('edge_media_to_comment', 'edges')
426 answers_count = 0
427 threaded_comments_available = False
416
417 comment_edges = self._field('edge_media_to_comment', 'edges')
418 answers_count = sum([edge['node'].get('edge_threaded_comments', {}).get('count', 0) for edge in comment_edges])
428419
429420 if self.comments == len(comment_edges) + answers_count:
430421 # If the Post's metadata already contains all parent comments, don't do GraphQL requests to obtain them
432423 return
433424 yield from (_postcomment(node) for node in
434425 self._context.graphql_node_list(
435 "97b41c52301f77ce508f55e66d17620e" if threaded_comments_available
436 else "f0986789a5c5d17c2400faebf16efd0d",
426 "97b41c52301f77ce508f55e66d17620e",
437427 {'shortcode': self.shortcode},
438428 'https://www.instagram.com/p/' + self.shortcode + '/',
439 lambda d:
440 d['data']['shortcode_media'][
441 'edge_media_to_parent_comment' if threaded_comments_available else 'edge_media_to_comment'],
442 self._rhx_gis))
429 lambda d: d['data']['shortcode_media']['edge_media_to_parent_comment']))
443430
444431 def get_likes(self) -> Iterator['Profile']:
445432 """Iterate over all likes of the post. A :class:`Profile` instance of each likee is yielded."""
454441 yield from (Profile(self._context, node) for node in
455442 self._context.graphql_node_list("1cb6ec562846122743b61e492c85999f", {'shortcode': self.shortcode},
456443 'https://www.instagram.com/p/' + self.shortcode + '/',
457 lambda d: d['data']['shortcode_media']['edge_liked_by'],
458 self._rhx_gis))
444 lambda d: d['data']['shortcode_media']['edge_liked_by']))
459445
460446 @property
461447 def is_sponsored(self) -> bool:
536522 self._has_public_story = None # type: Optional[bool]
537523 self._node = node
538524 self._has_full_metadata = False
539 self._rhx_gis = None
540525 self._iphone_struct_ = None
541526 if 'iphone_struct' in node:
542527 # if loaded from JSON with load_structure_from_file()
598583 def _obtain_metadata(self):
599584 try:
600585 if not self._has_full_metadata:
601 metadata = self._context.get_json('{}/'.format(self.username), params={})
586 metadata = self._context.get_json('{}/feed/'.format(self.username), params={})
602587 self._node = metadata['entry_data']['ProfilePage'][0]['graphql']['user']
603588 self._has_full_metadata = True
604 self._rhx_gis = metadata.get('rhx_gis')
605589 except (QueryReturnedNotFoundException, KeyError) as err:
606590 top_search_results = TopSearchResults(self._context, self.username)
607591 similar_profiles = [profile.username for profile in top_search_results.get_profiles()]
734718 'include_reel': False, 'include_suggested_users': False,
735719 'include_logged_out_extras': True,
736720 'include_highlight_reels': False},
737 'https://www.instagram.com/{}/'.format(self.username),
738 self._rhx_gis)
721 'https://www.instagram.com/{}/'.format(self.username))
739722 self._has_public_story = data['data']['user']['has_public_story']
740723 assert self._has_public_story is not None
741724 return self._has_public_story
794777 {'id': self.userid},
795778 'https://www.instagram.com/{0}/'.format(self.username),
796779 lambda d: d['data']['user']['edge_owner_to_timeline_media'],
797 self._rhx_gis,
798 self._metadata('edge_owner_to_timeline_media')))
780 first_data=self._metadata('edge_owner_to_timeline_media')))
799781
800782 def get_saved_posts(self) -> Iterator[Post]:
801783 """Get Posts that are marked as saved by the user."""
809791 {'id': self.userid},
810792 'https://www.instagram.com/{0}/'.format(self.username),
811793 lambda d: d['data']['user']['edge_saved_media'],
812 self._rhx_gis,
813 self._metadata('edge_saved_media')))
794 first_data=self._metadata('edge_saved_media')))
814795
815796 def get_tagged_posts(self) -> Iterator[Post]:
816797 """Retrieve all posts where a profile is tagged.
821802 self._context.graphql_node_list("e31a871f7301132ceaab56507a66bbb7",
822803 {'id': self.userid},
823804 'https://www.instagram.com/{0}/'.format(self.username),
824 lambda d: d['data']['user']['edge_user_to_photos_of_you'],
825 self._rhx_gis))
805 lambda d: d['data']['user']['edge_user_to_photos_of_you']))
826806
827807 def get_igtv_posts(self) -> Iterator[Post]:
828808 """Retrieve all IGTV posts.
834814 {'id': self.userid},
835815 'https://www.instagram.com/{0}/channel/'.format(self.username),
836816 lambda d: d['data']['user']['edge_felix_video_timeline'],
837 self._rhx_gis,
838 self._metadata('edge_felix_video_timeline')))
817 first_data=self._metadata('edge_felix_video_timeline')))
839818
840819 def get_followers(self) -> Iterator['Profile']:
841820 """
849828 self._context.graphql_node_list("37479f2b8209594dde7facb0d904896a",
850829 {'id': str(self.userid)},
851830 'https://www.instagram.com/' + self.username + '/',
852 lambda d: d['data']['user']['edge_followed_by'],
853 self._rhx_gis))
831 lambda d: d['data']['user']['edge_followed_by']))
854832
855833 def get_followees(self) -> Iterator['Profile']:
856834 """
864842 self._context.graphql_node_list("58712303d941c6855d4e888c5f0cd22f",
865843 {'id': str(self.userid)},
866844 'https://www.instagram.com/' + self.username + '/',
867 lambda d: d['data']['user']['edge_follow'],
868 self._rhx_gis))
845 lambda d: d['data']['user']['edge_follow']))
869846
870847 def get_similar_accounts(self) -> Iterator['Profile']:
871848 """
880857 yield from (Profile(self._context, edge["node"]) for edge in
881858 self._context.graphql_query("ad99dd9d3646cc3c0dda65debcd266a7",
882859 {"user_id": str(self.userid), "include_chaining": True},
883 "https://www.instagram.com/{0}/".format(self.username),
884 self._rhx_gis)["data"]["user"]["edge_chaining"]["edges"])
860 "https://www.instagram.com/{0}/"
861 .format(self.username))["data"]["user"]["edge_chaining"]["edges"])
885862
886863
887864 class StoryItem: