Source code for github2pandas.issues

import pandas as pd
from pathlib import Path
import os
import github

from .utility import Utility

[docs]class Issues():
    """
    Class to aggregate Issues

    Attributes
    ----------
    ISSUES_DIR : str
        Issues dir where all files are saved in.
    ISSUES : str
        Pandas table file for issues data.
    ISSUES_COMMENTS : str
        Pandas table file for comments data in issues.
    ISSUES_REACTIONS : str
        Pandas table file for reactions data in issues.
    ISSUES_EVENTS : str
        Pandas table file for reviews data in issues.

    Methods
    -------
    extract_issue_data(issue, users_ids, data_root_dir)
        Extracting general issue data.
    generate_issue_pandas_tables(repo, data_root_dir, reactions=False, check_for_updates=True)
        Extracting the complete issue data from a repository.
    get_issues(data_root_dir, filename=ISSUES)
        Get a genearted pandas table.
    
    """
    ISSUES_DIR = "Issues"
    ISSUES = "pdIssues.p"
    ISSUES_COMMENTS = "pdIssuesComments.p"
    ISSUES_REACTIONS = "pdIssuesReactions.p"
    ISSUES_EVENTS = "pdIssuesEvents.p"

[docs]    @staticmethod
    def extract_issue_data(issue, users_ids, data_root_dir):
        """
        extract_issue_data(issue, users_ids, data_root_dir)

        Extracting general issue data.

        Parameters
        ----------
        issue : Issue
            Issue object from pygithub.
        users_ids : dict
            Dict of User Ids as Keys and anonym Ids as Value.
        data_root_dir : str
            Data root directory for the repository.

        Returns
        -------
        dict
            Dictionary with the extracted general issue data.

        Notes
        -----
            PyGithub Issue object structure: https://pygithub.readthedocs.io/en/latest/github_objects/Issue.html

        """

        issue_data = {}
        issue_data["assignees"]  = Utility.extract_assignees(issue.assignees, users_ids, data_root_dir)
        issue_data["body"] = issue.body
        issue_data["closed_at"] = issue.closed_at
        if not issue._closed_by == github.GithubObject.NotSet:
            issue_data["closed_by"] = Utility.extract_user_data(issue.closed_by, users_ids, data_root_dir)
        issue_data["created_at"] = issue.created_at
        issue_data["id"] = issue.id
        issue_data["labels"]  = Utility.extract_labels(issue.labels)
        issue_data["state"] = issue.state
        issue_data["title"] = issue.title
        issue_data["updated_at"] = issue.updated_at
        if not issue._user == github.GithubObject.NotSet:
            issue_data["author"] = Utility.extract_user_data(issue.user, users_ids, data_root_dir)
        return issue_data

[docs]    @staticmethod
    def generate_issue_pandas_tables(repo, data_root_dir, reactions=False, check_for_updates=True):
        """
        generate_issue_pandas_tables(repo, data_root_dir, reactions=False, check_for_updates=True)

        Extracting the complete issue data from a repository.

        Parameters
        ----------
        repo : Repository
            Repository object from pygithub.
        data_root_dir : str
            Data root directory for the repository.
        reactions : bool, default=False
            If reactions should also be exracted. The extraction of all reactions increases significantly the aggregation speed.
        check_for_updates : bool, default=True
            Check first if there are any new issues information.

        Notes
        -----
            PyGithub Repository object structure: https://pygithub.readthedocs.io/en/latest/github_objects/Repository.html
        
        """

        if check_for_updates:
            new_issues = repo.get_issues(state='all') 
            real_issues = []
            for issue in new_issues:
                if issue._pull_request == github.GithubObject.NotSet:
                    real_issues.append(issue)
            old_issues = Issues.get_issues(data_root_dir)
            if not Utility.check_for_updates(real_issues, old_issues):
                return

        issues_dir = Path(data_root_dir, Issues.ISSUES_DIR)
        issues = repo.get_issues(state='all') 
        users_ids = Utility.get_users_ids(data_root_dir)
        issue_list = []
        issue_comment_list = []
        issue_event_list = []
        issue_reaction_list = []
        for issue in issues:
            # remove pull_requests from issues
            if issue._pull_request == github.GithubObject.NotSet:
                # issue data
                issue_data = Issues.extract_issue_data(issue, users_ids, data_root_dir)
                issue_list.append(issue_data)
                # issue comment data
                for comment in issue.get_comments():
                    issue_comment_data = Utility.extract_comment_data(comment, issue.id, "issue", users_ids, data_root_dir)
                    issue_comment_list.append(issue_comment_data)
                    # issue comment reaction data
                    if reactions:
                        for reaction in comment.get_reactions():
                            reaction_data = Utility.extract_reaction_data(reaction,comment.id,"comment", users_ids, data_root_dir)
                            issue_reaction_list.append(reaction_data)
                # issue event data
                for event in issue.get_events():
                    issue_event_data = Utility.extract_event_data(event, issue.id, "issue", users_ids, data_root_dir)
                    issue_event_list.append(issue_event_data)
                # issue reaction data
                if reactions:
                    for reaction in issue.get_reactions():
                        issue_reaction_data = Utility.extract_reaction_data(reaction,issue.id, "issue", users_ids, data_root_dir)
                        issue_reaction_list.append(issue_reaction_data)    
        # Save lists
        Utility.save_list_to_pandas_table(issues_dir, Issues.ISSUES, issue_list)
        Utility.save_list_to_pandas_table(issues_dir, Issues.ISSUES_COMMENTS, issue_comment_list)
        Utility.save_list_to_pandas_table(issues_dir, Issues.ISSUES_EVENTS, issue_event_list)
        if reactions:
            Utility.save_list_to_pandas_table(issues_dir, Issues.ISSUES_REACTIONS, issue_reaction_list)
    
[docs]    @staticmethod
    def get_issues(data_root_dir, filename=ISSUES):
        """
        get_issues(data_root_dir, filename=ISSUES)

        Get a genearted pandas table.

        Parameters
        ----------
        data_root_dir : str
            Data root directory for the repository.
        filename : str, default=ISSUES
            Pandas table file for issues or comments or reactions or events data.

        Returns
        -------
        DataFrame
            Pandas DataFrame which can include the desired data

        """
        
        issues_dir = Path(data_root_dir, Issues.ISSUES_DIR)
        pd_issues_file = Path(issues_dir, filename)
        if pd_issues_file.is_file():
            return pd.read_pickle(pd_issues_file)
        else:
            return pd.DataFrame()