<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD 2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">EXCLI J</journal-id>
      <journal-title>EXCLI Journal</journal-title>
      <issn pub-type="epub">1611-2156</issn>
      <publisher>
        <publisher-name>Leibniz Research Centre for Working Environment and Human Factors</publisher-name>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">2015-731</article-id>
	  <article-id pub-id-type="doi">10.17179/excli2015-731</article-id>
      <article-id pub-id-type="pii">Doc38</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original article</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>QSAR study of HCV NS5B polymerase inhibitors using the genetic algorithm-multiple linear regression (GA-MLR)</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author">
          <name>
            <surname>Rafiei</surname>
            <given-names>Hamid</given-names>
          </name>
          <xref ref-type="aff" rid="A1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <name>
            <surname>Khanzadeh</surname>
            <given-names>Marziyeh</given-names>
          </name>
          <xref ref-type="aff" rid="A2">2</xref>
        </contrib>
        <contrib contrib-type="author">
          <name>
            <surname>Mozaffari</surname>
            <given-names>Shahla</given-names>
          </name>
          <xref ref-type="aff" rid="A2">2</xref>
        </contrib>
        <contrib contrib-type="author">
          <name>
            <surname>Bostanifar</surname>
            <given-names>Mohammad Hassan</given-names>
          </name>
          <xref ref-type="aff" rid="A1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <name>
            <surname>Avval</surname>
            <given-names>Zhila Mohajeri</given-names>
          </name>
          <xref ref-type="aff" rid="A2">2</xref>
        </contrib>
        <contrib contrib-type="author">
          <name>
            <surname>Aalizadeh</surname>
            <given-names>Reza</given-names>
          </name>
          <xref ref-type="aff" rid="A3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <name>
            <surname>Pourbasheer</surname>
            <given-names>Eslam</given-names>
          </name>
          <xref ref-type="corresp" rid="COR1">&#x0002a;</xref>
          <xref ref-type="aff" rid="A2">2</xref>
        </contrib>
      </contrib-group>
      <aff id="A1">
        <label>1</label>Department of Chemistry, Dashtestan Branch, Islamic Azad University, Dashtestan, Iran</aff>
      <aff id="A2">
        <label>2</label>Department of Chemistry, Payame Noor University (PNU), P. O. Box 19395-3697, Tehran, Iran</aff>
      <aff id="A3">
        <label>3</label>Laboratory of Analytical Chemistry, Department of Chemistry, University of Athens, Panepistimiopolis Zografou, 15771 Athens, Greece</aff>
      <author-notes>
        <corresp id="COR1">*To whom correspondence should be addressed: Eslam Pourbasheer, Department of Chemistry, Payame Noor University (PNU), P. O. Box 19395-3697, Tehran, Iran; Tel: +98-45-33519448, Fax: +98-45-33519448, E-mail: <email>pourbasheer@ut.ac.ir</email></corresp>
      </author-notes>
      <pub-date pub-type="epub">
        <day>18</day>
        <month>01</month>
        <year>2016</year>
      </pub-date>
      <pub-date pub-type="collection">
        <year>2016</year>
      </pub-date>
      <volume>15</volume>
      <fpage>38</fpage>
	  <lpage>53</lpage>
      <history>
        <date date-type="received">
          <day>07</day>
          <month>12</month>
          <year>2015</year>
        </date>
        <date date-type="accepted">
          <day>05</day>
          <month>01</month>
          <year>2016</year>
        </date>
      </history>
      <permissions>
        <copyright-statement>Copyright &#xA9; 2016 Rafiei et al.</copyright-statement>
        <copyright-year>2016</copyright-year>
       <license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/">
          <p>This is an Open Access article distributed under the terms of the Creative Commons Attribution Licence (http://creativecommons.org/licenses/by/4.0/) You are free to copy, distribute and transmit the work, provided the original author and source are credited.</p>
        </license>
      </permissions>
      <self-uri xlink:href="http://www.excli.de/vol15/Pourbasheer_18012016_proof.pdf">This article is available from http://www.excli.de/vol15/Pourbasheer_18012016_proof.pdf</self-uri>
      <abstract><p>Quantitative structure-activity relationship (QSAR) study has been employed for predicting the inhibitory activities of the <bold><italic>Hepatitis C virus (HCV)</italic></bold> <bold><italic>NS5B polymerase inhibitors</italic></bold>. A data set consisted of 72 compounds was selected, and then different types of molecular descriptors were calculated. The whole data set was split into a training set (80 &#x25; of the dataset) and a test set (20 &#x25; of the dataset) using principle component analysis. The stepwise (SW) and the genetic algorithm (GA) techniques were used as variable selection tools. Multiple linear regression method was then used to linearly correlate the selected descriptors with inhibitory activities. Several validation technique including leave-one-out and leave-group-out cross-validation, Y-randomization method were used to evaluate the internal capability of the derived models. The external prediction ability of the derived models was further analyzed using modified r<sup>2</sup>, concordance correlation coefficient values and Golbraikh and Tropsha acceptable model criteria&#x27;s. Based on the derived results (GA-MLR), some new insights toward molecular structural requirements for obtaining better inhibitory activity were obtained.</p></abstract>
      <kwd-group>
        <kwd>QSAR</kwd>
        <kwd>genetic algorithms</kwd>
        <kwd>multiple linear regression</kwd>
        <kwd>HCV</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="intro">
      <title>Introduction</title><p>Hepatitis C virus (HCV), identified in 1989 as the etiological agent of parenteral non-A non-B hepatitis, often causes the development of malignant chronic disease, including liver cirrhosis and hepatocellular carcinoma, frequently resulting in death (Alter et al., 1992[<xref ref-type="bibr" rid="R3">3</xref>]; Choo et al., 1989[<xref ref-type="bibr" rid="R8">8</xref>]; Leyssen et al., 2000[<xref ref-type="bibr" rid="R24">24</xref>]). With an estimated 3 &#x25; of the global population infected with HCV, including 4.1 million in the United States alone, and no protective vaccine available at present, this disease has emerged as a serious global health problem (Wasley and Alter, 2000[<xref ref-type="bibr" rid="R45">45</xref>]; Alter et al., 1999[<xref ref-type="bibr" rid="R2">2</xref>]). Although significant advances have been made in the development of treatments for chronic hepatitis C, their efficacy is not universal and only 50 &#x25; success has been reported in achieving a sustained viral response for the current combination therapy with new pegylated (PEG) forms of interferon plus ribavirin (Dillon, 2004[<xref ref-type="bibr" rid="R11">11</xref>]; H&#xFC;gle and Cerny, 2003[<xref ref-type="bibr" rid="R19">19</xref>]; Walker et al., 2003[<xref ref-type="bibr" rid="R43">43</xref>]; Wang and Heinz, 2000[<xref ref-type="bibr" rid="R44">44</xref>]). Moreover, this therapy has considerable liabilities including serious adverse side effects and high cost, thus highlighting the need to develop improved therapeutic options to target HCV infections (Cornberg et al., 2003[<xref ref-type="bibr" rid="R10">10</xref>]). </p><p>HCV is an envelope positive-stranded RNA virus. Its single-stranded &#x7E;9.6 kb RNA genome encodes a large polyprotein of &#x7E;3010 amino acids comprising 4 structural proteins (Core, E1, E2, and p7) and 6 nonstructural proteins (NS2, -3, -4A, -4B, -5A, and -5B) (Grakoui et al., 1993[<xref ref-type="bibr" rid="R14">14</xref>]; Hijikata et al., 1991[<xref ref-type="bibr" rid="R17">17</xref>]; Lohmann et al., 1995[<xref ref-type="bibr" rid="R26">26</xref>]). One of the NS proteins, NS5B, an RNA-dependent RNA polymerase (RdRp) is the most studied target for anti-HCV therapy as it is a crucial and unique component of the viral replication machinery (Dillon, 2004[<xref ref-type="bibr" rid="R11">11</xref>]; Kaushik-Basu et al., 2007[<xref ref-type="bibr" rid="R22">22</xref>]; Wang and Heinz, 2000[<xref ref-type="bibr" rid="R44">44</xref>]). NS5B, a 68 kDa membrane-associated protein contains motifs shared by all RdRps in which the catalytic domain is arranged around a central cleft in an organization that resembles a right hand, with the &#x201C;palm&#x201D; &#x201C;finger&#x201D; and &#x201C;thumb&#x201D; subdomains common to polymerases (Bressanelli et al., 2002[<xref ref-type="bibr" rid="R7">7</xref>]; Love et al., 2003[<xref ref-type="bibr" rid="R27">27</xref>]). Recombinant expression of active, soluble NS5B in a variety of systems has been achieved by various C-terminal deletions between 21 and 55 amino acid residues and its biochemical properties investigated (Kaushik-Basu et al., 2007[<xref ref-type="bibr" rid="R22">22</xref>]). All of these reported recombinant HCV RdRps utilize a wide range of RNAs as template <italic>in vitro</italic> without preference, although they do prefer certain homo-polyribonucleotides to others and their activity is stimulated by GTP under specified conditions. Many screening assays for NS5B inhibitors utilize synthetic homopolymeric templates&#x2F;primers. NS5B inhibitors thus far identified by these screening procedures can be broadly classified as either nucleoside (NI) or non-nucleoside (NNI) inhibitors (Kaushik-Basu et al., 2007[<xref ref-type="bibr" rid="R22">22</xref>]). </p><p>Quantitative structure-activity relationships (QSAR) studies play a key role in predicting the biological activity of new compound and provide information that is useful for molecule designing and medicinal chemistry (Karbakhsh and Sabet, 2011[<xref ref-type="bibr" rid="R21">21</xref>]; Noorizadeh and Farmany, 2014[<xref ref-type="bibr" rid="R31">31</xref>]). QSAR model establishes the mathematical relationship between chemical properties or activities of compounds with their various structural parameters (descriptors) such as topological, physicochemical, stereochemical or electronic indices (Pourbasheer et al., 2014[<xref ref-type="bibr" rid="R33">33</xref>]; Rathod, 2011[<xref ref-type="bibr" rid="R36">36</xref>]). The most important step in building QSAR models is the selection of one or more molecular descriptors that can represent the true interpretation of molecular structure with its activity or properties (Niazi et al., 2006[<xref ref-type="bibr" rid="R30">30</xref>]). Therefore, a validated QSAR model can provide valuable information, not only about the effect of fragments in molecular graph, but also it can predict the biological activities without performing any experimental efforts that the designing results are not clear. In this contribution, multiple linear regression (MLR) technique was employed to build QSAR models using the theoretical molecular descriptors selected by stepwise (SW) and genetic algorithm (GA) methods based on the training set compounds (Li et al., 2008[<xref ref-type="bibr" rid="R25">25</xref>]) in order to correlate the biological activities of taken compounds with their chemical strutures. </p><p>The primary goal of this work was to develop a new and validated QSAR model, and then investigating the molecular structural requirements for improving the biological activities based on the derived models.</p></sec>
    <sec sec-type="methods">
      <title>Methodology</title><sec><title>Data set</title><p>In this study, the data set consisting of 72 molecules of Indole 5-carboxamide derivatives along with their experimental inhibitory activities were taken from the literature (Beaulieu et al., 2011[<xref ref-type="bibr" rid="R6">6</xref>][<xref ref-type="bibr" rid="R5">5</xref>]). The chemical structures with their activities are shown in Table 1<xref ref-type="fig" rid="T1">(Tab. 1)</xref>. The inhibitory activity values &#x5B;IC<sub>50</sub> (nM)&#x5D; were converted to the logarithmic scale pIC<sub>50</sub> &#x5B;-log IC<sub>50</sub> (M)&#x5D; so as to give numerically larger value, and then used for the subsequent QSAR analyses. The molecules were divided into two subsets using principle component analysis (PCA) in which resulted in generation of the training set contained 59 compounds and the test set contained 13 compounds. The training set was employed to build the model, and the test set was used to evaluate the external prediction ability of the built models. </p></sec><sec><title>Descriptor calculation</title><p>The two-dimensional (2D) structures of the molecules were sketched in Hyperchem v7.3 software (HyperChem, 2002[<xref ref-type="bibr" rid="R20">20</xref>]) and pre-optimization was done using molecular mechanics force field (MM&#x2B;) procedure, and final geometries optimization was performed using semi-empirical (AM1) method with root mean square gradient of 0.01 kcal mol<sup>-1</sup>. A total of 3224 different molecular descriptors were calculated for each molecule using Dragon v5.5 package (Todeschini et al., 2010[<xref ref-type="bibr" rid="R41">41</xref>]). The constant or near constant variables were removed, and then, the collinear descriptors (i.e. r&#x3E;0.9) were removed. The remained molecular descriptors were then taken for variable selection tool to derive the most respective subset of descriptors.</p></sec><sec><title>Principle Component Analysis (PCA)</title><p>The division of the dataset into training and test set is the most crucial step since based on the selected compounds, the models are being built. To divide the dataset into training and the test set, principle component analysis (PCA) (Abdi and Williams, 2010[<xref ref-type="bibr" rid="R1">1</xref>]) was used so as to split the dataset based on their chemical structures diversity. The compounds in test set were selected considering the distribution in chemical structure diversity and also for avoiding the fitting problem, the better distribution of biological activities for selected compounds were considered. As a result of the PCA, 6 significant principal components (PC-s) were extracted from the variables (PC<sub>1</sub>&#x3D;49.81 &#x25;, PC<sub>2</sub>&#x3D;22.09 &#x25;, PC<sub>3</sub>&#x3D;12.25 &#x25;, PC<sub>4</sub>&#x3D;7.10 &#x25;, PC<sub>5</sub>&#x3D;6.65 &#x25;, PC<sub>6</sub>&#x3D;3.10 &#x25;,). PC<sub>1</sub> and PC<sub>2</sub> were selected for the division purpose since they covered the most variability in the dataset. The selection is first made based on the distribution of data points in PC<sub>1</sub> and PC<sub>2</sub> and then, the final candidate as test set compounds were chosen by considering the well-distribution for their biological activities. </p></sec><sec><title>Variable selection technique</title><p>The selection of relevant descriptors for building the predictive model is also an important step in model construction. The final goal in this step is to find the most respective descriptors which can be used to predict the biological activities with minimum error. In this contribution, we used two well-known variable selection methods including stepwise (SW) and genetic algorithm (GA). Stepwise regression includes a regression model in which the selecting of predictive variables is done by an automatic procedure (Draper and Smith, 1981[<xref ref-type="bibr" rid="R12">12</xref>]) considering the F-test. Stepwise method pursues the forward selection and backward elimination rule where forward selection begins with no variable presented in the model and testing the addition of each variable improving the model outcome while, backward elimination begins with all variable and assessing the removing of variables which can improve the model by being omitted (Draper and Smith, 1981[<xref ref-type="bibr" rid="R12">12</xref>]). In genetic algorithms, the initial step is creating a large number of randomly selected descriptors termed chromosome where the variables are included in each chromosome called gene (Holland, 1975[<xref ref-type="bibr" rid="R18">18</xref>]; Pourbasheer et al., 2014[<xref ref-type="bibr" rid="R32">32</xref>][<xref ref-type="bibr" rid="R34">34</xref>]). Despite the stepwise technique, genetic algorithm is not presenting the over fitting issue, since it is using correlation coefficient of leave-one-out cross-validation (Q<sup>2</sup><sub>LOO</sub>) as a fitness function where subset of variables are being evaluated by their fitness for selection as the most respective descriptors. Subsequently, the subsets with worse fitness function are being excluded and then, the remained subsets are breeding. Finally, the mutation is carrying out. Genetic algorithm technique was first developed by Leardi et al. (1992[<xref ref-type="bibr" rid="R23">23</xref>]). Genetic algorithm and stepwise methods as selection tool were written in Matlab 6.5 program (Mathworks, 2005[<xref ref-type="bibr" rid="R29">29</xref>]).</p></sec></sec>
    <sec sec-type="discussion">
      <title>Results and Discussion</title><p>The total data set was separated into a training set of 59 compounds to develop the models and a test set of 13 compounds using PCA. The training and test sets are shown in Table 1<xref ref-type="fig" rid="T1">(Tab. 1)</xref>. After division of dataset, stepwise method was used to provide the most relevant descriptors for modeling purpose. Multiple linear regression method then was used to linearly correlate the selected descriptors based on the stepwise techniques on the biases of training set compounds, and then evaluated using group of compounds as test set. During the derivation of model, 2 compounds belonging to the test set were detected as outliers and excluded from analyses (Table 1<xref ref-type="fig" rid="T1">(Tab. 1)</xref>). The derived linear equation based on SW-MLR is as follows:</p><p>pIC<sub>50</sub>&#x3D; 22.32 (&#xB1;3.511) - 4.397 (&#xB1;0.9607) EEig05x &#x2B; 2.673 (&#xB1;0.7931) GGI9 - 0.01958 (&#xB1;0.008726) RDF065m - 0.7414 (&#xB1;0.1620) Mor19m &#x2B; 49.53 (&#xB1;11.34) R3u&#x2B; &#x2B; 0.1809 (&#xB1;0.07231) C-028 (1)</p><p><italic>N</italic><sub>train</sub>&#x3D; 59, <italic>R</italic><sup>2</sup><sub>train</sub>&#x3D; 0.772, <italic>R</italic><sup>2</sup><sub>test</sub>&#x3D; 0.703, <italic>R</italic><sup>2</sup><sub>adj</sub>&#x3D; 0.745, <italic>F</italic><sub>train</sub>&#x3D; 29.284, <italic>F</italic><sub>test</sub>&#x3D; 0.9878, <italic>RMSE</italic><sub>train</sub>&#x3D; 0.238, <italic>RMSE</italic><sub>test</sub> &#x3D; 0.265, <italic>Q</italic><sup>2</sup><sub>LOO</sub>&#x3D;0.697, <italic>Q</italic><sup>2</sup><sub>LGO</sub>&#x3D; 0.720, <italic>Q</italic><sup>2</sup><sub>BOOT</sub>&#x3D; 0.712, CCC<sub>train</sub>&#x3D;0.871, CCC<sub> test</sub>&#x3D;0.781, r<sup>2</sup>m&#x3D;0.596, r<sup>2</sup>m <sub>average</sub>&#x3D;0.433, MAE<sub>train</sub>&#x3D;0.190, MAE<sub>test</sub>&#x3D; 0.192.</p><p>In above equation, <italic>N</italic> is the number of training set compounds, <italic>R</italic><sup>2</sup> is the squared correlation coefficient, <italic>RMSE</italic> is the root mean square error, <italic>R</italic><sup>2</sup><sub>adj</sub> is adjusted <italic>R</italic><sup>2</sup>, <italic>Q</italic><sup>2</sup><sub>LOO</sub>, <italic>Q</italic><sup>2</sup><sub>LGO</sub> and <italic>Q</italic><sup>2</sup><sub>BOOT</sub> are the squared cross-validation coefficients for leave one out, leave group out and bootstrapping respectively, and <italic>F</italic> is the Fisher <italic>F</italic>-statistic. CCC is concordance correlation coefficient and evaluates the degree to which pairs of observations fall on the 45&#xB0; line through the origin (Pourbasheer et al., 2014[<xref ref-type="bibr" rid="R35">35</xref>]). The r<sup>2</sup>m is modified r2 value and MAE is mean absolute error. The developed model since represented lower accuracy for test set, Golbraikh and Tropsha acceptable model criteria&#x27;s was employed to investigate the reliability of the derived model (Golbraikh and Tropsha, 2002[<xref ref-type="bibr" rid="R13">13</xref>]). Four conditions for accepting a model are as follows:</p><p><list list-type="order"><list-item><p>Q<sup>2</sup><sub>LOO</sub> &#x3E; 0.5</p></list-item><list-item><p>R<sup>2</sup> <sub>test</sub>&#x3E; 0.6</p></list-item><list-item><p>R<sub>0</sub><sup>2</sup> - R<sub>0</sub><sup>&#x27;2</sup>&#x2F;R<sup>2</sup> &#x3C; 0.1 and 0.85 &#x3C; K&#x27; &#x3C; 1.15 or R<sup>2</sup> - R<sub>0</sub><sup>2</sup>&#x2F;R<sup>2</sup> &#x3C; 0.1 and 0.85 &#x3C; K &#x3C; 1.15</p></list-item><list-item><p>R<sub>0</sub><sup>2</sup> - R<sub>0</sub><sup>&#x27;2</sup> &#x3C; 0.3</p></list-item></list></p><p>where R is correlation coefficient between the observed and predicted values; R<sub>0</sub><sup>2</sup> is coefficients of calculation (correlation between predicted versus observed values with intercept of zero), and R<sub>0</sub>&#x2032;<sup>2</sup> is correlation between predicted versus observed responses for regressions through the origin; K is slope and K&#x2032; is slope of regression lines through the origin. The results of this analysis were listed in Table 2<xref ref-type="fig" rid="T2">(Tab. 2)</xref>. As it can be seen, the last condition for acceptance of a derived model based on SW-MLR was rejected. Therefore, the genetic algorithm as a method for variable selection was applied to the same data set (i.e. training and test set selected based on PCA) for selecting the best set of molecular descriptors. The GA-MLR analysis led to a model with six descriptors. This linear model and its statistical parameters are derived as follows:</p><p>pIC<sub>50</sub>&#x3D;  36.97 (&#xB1;4.056) - 7.971 (&#xB1;0.9724) EEig05r &#x2B; 0.6368 (&#xB1;0.1662) GGI4 - 0.1752 (&#xB1;0.06418) SPAN - 0.5972 (&#xB1;0.1320) Mor19m &#x2B; 45.88 (&#xB1;13.05) R3u&#x2B; - 5.624 (&#xB1;1.617) R5p (2)</p><p><italic>N</italic><sub>train</sub>&#x3D; 59, <italic>R</italic><sup>2</sup><sub>train</sub>&#x3D; 0.792, <italic>R</italic><sup>2</sup><sub>test</sub>&#x3D; 0.713, <italic>R</italic><sup>2</sup><sub>adj</sub>&#x3D; 0.778, <italic>F</italic><sub>train</sub>&#x3D; 32.985, <italic>F</italic><sub>test</sub>&#x3D;1.3885, <italic>RMSE</italic><sub>train</sub>&#x3D; 0.227, <italic>RMSE</italic><sub>test</sub> &#x3D; 0.252, <italic>Q</italic><sup>2</sup><sub>LOO</sub>&#x3D; 0.737, <italic>Q</italic><sup>2</sup><sub>LGO</sub>&#x3D; 0.762, <italic>Q</italic><sup>2</sup><sub>BOOT</sub>&#x3D; 0.731, CCC<sub>train</sub>&#x3D;0.884, CCC<sub>test</sub>&#x3D;0.819, r<sup>2</sup>m&#x3D;0.666, r<sup>2</sup>m <sub>average</sub>&#x3D;0.533, MAE<sub>train</sub>&#x3D;0.188, MAE<sub>test</sub>&#x3D; 0.213.</p><p>The PCA results were shown in Figure 1<xref ref-type="fig" rid="F1">(Fig. 1)</xref>. PC<sub>1</sub>-PC<sub>2</sub> loadings plot using the six descriptors for the best model (GA-MLR) were shown in Figure 2<xref ref-type="fig" rid="F2">(Fig. 2)</xref>. In Figure 2<xref ref-type="fig" rid="F2">(Fig. 2)</xref>, for the loadings it is confirmed that the compounds with higher biological activity values, located on the left side which are presenting a large contribution of the R3u&#x2B; descriptor, situated on the same side in Figure 1<xref ref-type="fig" rid="F1">(Fig. 1)</xref>. On the other hand, compounds with lower biological activity values, on the right side, have more pronounced contributions from the other descriptors (mostly from R5p and EEig05r). Also it can be observed that the distribution of scores in Figure 1<xref ref-type="fig" rid="F1">(Fig. 1)</xref> is much more in right side and upper which represent that the most of compounds in data set have higher value for descriptors that have negative values than for the descriptors with positive effects. Therefore, the selected PCs are the true representative of the molecular descriptors that can be encoded for understanding the correlation between chemical structures and biological activities.</p><p>Golbraikh and Tropsha acceptable model criteria&#x27;s was employed for evaluating the prediction capability of the built GA-MLR model. The results are listed in Table 2<xref ref-type="fig" rid="T2">(Tab. 2)</xref>. As it can be seen, the all conditions were accepted for GA-MLR and therefore, it was used as a main model for prediction purpose. The experimental and predicted activities based on this model were given in Table 1<xref ref-type="fig" rid="T1">(Tab. 1)</xref>. The plot of the predicted pIC<sub>50</sub> versus the experimental pIC<sub>50</sub> is demonstrated in Figure 3<xref ref-type="fig" rid="F3">(Fig. 3)</xref>. As can be seen from Table 1<xref ref-type="fig" rid="F1">(Fig. 1)</xref> and Figure 3<xref ref-type="fig" rid="F3">(Fig. 3)</xref>, the calculated activity values are in good agreement with experimental activity values.</p><p>The inter-correlation between the six selected descriptors was inspected by calculating their variance inflation factor (VIF), which are also given in Table 3<xref ref-type="fig" rid="T3">(Tab. 3)</xref>. The VIF values, calculated as 1&#x2F;1- r<sup>2</sup>, where r<sup>2</sup> is the multiple correlation coefficient of one descriptor&#x27;s effect regressed on the remaining molecular descriptors. If VIF equals to 1, then no inter-correlation exists for each variable; if VIF falls into the range between 1 and 5, the related model is acceptable; and if VIF is larger than 10, the related model is unstable and a recheck is necessary (Maryam et al., 2012[<xref ref-type="bibr" rid="R28">28</xref>]). As it can be seen by the given information of Table 3<xref ref-type="fig" rid="T3">(Tab. 3)</xref>, most of the variables had VIF values of less than 5, indicating that the GA-MLR model has statistic significance. </p><p>The built GA-MLR model was validated using the leave-one-out and leave-group-out cross-validated correlation coefficients (Q<sup>2</sup><sub>LOO</sub> and Q<sup>2</sup><sub>LGO</sub>). The robustness of the GA-MLR model and its predictive ability was confirmed by the high Q<sup>2</sup><sub>BOOT</sub> source based on bootstrapping repeated 5000 times (Hadizadeh et al., 2013[<xref ref-type="bibr" rid="R15">15</xref>]). The results produced by the Q<sup>2</sup><sub>LOO</sub>, Q<sup>2</sup><sub>LGO</sub> and Q<sup>2</sup><sub>BOOT</sub> parameters along with other validation parameters showed the higher quality of the developed GA-MLR model. Therefore, this model can be used to predict the inhibition activity of the compounds. </p><p>The robustness of the QSAR model was further assessed by applying Y-randomization test. The dependent variable vector (inhibitory activity) was shuffled randomly and the new QSAR models (after several repetitions) would be anticipated to have low R<sup>2</sup> and Q<sup>2</sup><sub>LOO</sub> values (Figure 4<xref ref-type="fig" rid="F4">(Fig. 4)</xref>) (Asadollahi et al., 2011[<xref ref-type="bibr" rid="R4">4</xref>]). As it can be seen from Figure 4<xref ref-type="fig" rid="F4">(Fig. 4)</xref>, after 200 times shuffling the biological response for compounds, all of the derived new models were less than that of obtained in real response.</p><p>The Williams plot, the plot of the standardized residuals versus the leverage (h), is used to visualize the applicability domain (AD) of QSAR models (Vahdani and Bayat, 2011[<xref ref-type="bibr" rid="R42">42</xref>]). From the Williams plot (Figure 5<xref ref-type="fig" rid="F5">(Fig. 5)</xref>), it is obvious that there are only two compounds (No. 1 and No. 6 belonging to the training set) have the leverage higher than the warning <italic>h</italic>&#x2217; value of 0.356, thus they can be considered as structural outliers. From Figure 4<xref ref-type="fig" rid="F4">(Fig. 4)</xref>, it is obvious that the standardized residuals observed for all the compounds in the training and test sets are smaller than three standard deviation units (3&#x3B4;). Thus, the generated model is acceptable for prediction purpose.</p><sec><title>Interpretation of descriptors</title><p>By interpreting the descriptors contained in GA-MLR model, some new insights can be obtained which can be helpful for understanding the correlation of chemical structure with biological activities.</p><p>The first selected descriptor is Eigenvalue 05 from edge adj. matrix weighted by resonance integrals (EEig05r) which belongs to the edge adjacency indices and encodes the connectivity between graph edges (Todeschini and Consonni, 2000[<xref ref-type="bibr" rid="R39">39</xref>]). Resonance is a kind of energy stabilizing because of its delocalization effects over electrons in a bond network. As it can be seen, this descriptor represented negative effect in derived GA-MLR model encoding that increasing in the value of EEig05r by increasing the capability of the molecules (the functional groups that provide resonance in bonding with other part of bonding network) for providing more resonances would cause to decrease the pIC<sub>50 </sub>of compounds.</p><p>GGI4 is the second selected descriptor which is representing the topological charge index of order 4 (Todeschini and Consonni, 2008[<xref ref-type="bibr" rid="R38">38</xref>]). Topological charge indices are evaluating the charge transfer between atoms. These types of descriptors were first introduced by Galvez. In this concept a matrix called <bold>M</bold> was being obtained by multiplying the adjacency matrix <bold>A</bold> by the reciprocal square distance matrix (<bold>D</bold><bold><sup>-2</sup></bold>). However to prevent the division by zero, the diagonal entries of the distance matrix remain the same; the obtained matrix <bold>M</bold> called the Galvez matrix is then the unsymmetrical matrix (<italic>A&#xD7;A</italic>) , and A is the number of atoms in matrix. Based on the derived <bold>M</bold> matrix the charge term matrix (<bold>CT</bold><bold><sub>ij</sub></bold>) which is the charge transfer between the pair of considered vertices can be obtained as follows:</p><p><inline-graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="EXCLI-15-38-i-001" ></inline-graphic></p><p>where <italic>m</italic><italic><sub>ij</sub></italic> is elements of matrix M, <italic>&#x3B4;</italic><italic><sub>i</sub></italic> is vertex degree of <italic>i</italic> atom. <italic>CT</italic><italic><sub>ij</sub></italic> is also representing the net charge transfer between atom <italic>j</italic> and <italic>i</italic>. Hence, for each path length <italic>k</italic>, a topological charge index termed as <italic>G</italic><italic><sub>k</sub></italic> can be obtained as follows:</p><p><inline-graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="EXCLI-15-38-i-002" ></inline-graphic></p><p><italic>d</italic><italic><sub>ij</sub></italic> is elements of distance matrix. Therefore, the <italic>G</italic><italic><sub>k</sub></italic> is the half-sum of all charge and indicate the total charge transfer between atoms placed at topological distance k. The positive sign of this descriptor in derived linear equation indicates that increasing the charge transfer between the pair of atoms would result in increase of the pIC<sub>50</sub> values, respectively. </p><p>The third selected descriptor (SPAN) is span R which belonged to geometrical size indices and represents the radius of the smallest sphere, centered on the mass, enclosing all atoms of a molecule (Todeschini and Consonni, 2009[<xref ref-type="bibr" rid="R40">40</xref>]), and can be calculated as follows:</p><p><inline-graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="EXCLI-15-38-i-003" ></inline-graphic></p><p>where <italic>r</italic><italic><sub>i</sub></italic> is the distance of the <italic>ith</italic> atom from the center of the mass. Since this descriptor represents the negative sign in derived linear model, increasing the size of molecules by increasing the distance of specific moieties in molecules would result in decrease of the pIC<sub>50 </sub>values.</p><p>Mor19m, the fourth selected descriptor of GA-MLR equation, 3D-MoRSE-signal 19&#x2F;weighted by atomic masses, belongs to the 3D-MoRSE descriptors. This group of descriptors is subgroup of geometrical descriptors (Todeschini and Consonni, 2000[<xref ref-type="bibr" rid="R39">39</xref>]). Value of this group of descriptors is dependent to 3D structure of molecule. 3D-MoRSE descriptors (3D-Molecule Representation of Structures based on electron diffraction) are based on the idea of obtaining information from the 3D atomic coordinates by the transform used in electron diffraction studies for preparing theoretical scattering curves (Soltzberg and Wilkins, 1977[<xref ref-type="bibr" rid="R37">37</xref>]). This can be performed by infrared spectra simulation using a generalized scattering function. The Mor19m is associated with negative regression coefficient indicating that decreases in the corresponding 3D-MoRSE signal at scanning distance of 19 would result in increase of pIC<sub>50</sub>value, namely.</p><p>The fifth and six descriptors (R3u&#x2B; and R5p, respectively) belong to the GETAWAY R-indices descriptors. GETAWAY descriptors are for geometry, topology and atomic-weights assembly. These descriptors are geometrical descriptors in which provide good position of substituents and fragments in molecule (Consonni et al., 2002[<xref ref-type="bibr" rid="R9">9</xref>]). In addition, they can carry on good information on molecular size and shape. R3u&#x2B; (R maximal autocorrelation of lag 3&#x2F;unweighted) related to the maximum steric contributions to molecules shape with the topological distance of 3 (Hall and Kier, 1995[<xref ref-type="bibr" rid="R16">16</xref>]; Todeschini and Consonni, 2000[<xref ref-type="bibr" rid="R39">39</xref>]). Since it presented a positive sign in derived linear equation, increasing in value of this descriptor will cause to increase of the activity (pIC<sub>50</sub>). On the other hand, the other type of GETAWAY R-indices (i.e. R5p) which is R maximal autocorrelation of lag 5&#x2F;weighted by polarizability would cause decrease in biological activity (pIC<sub>50</sub>) due to its negative sign in obtained linear equation. Therefore, to obtain a good biological activity, the polarizibility of molecule should be decreased.</p><p>To conclude, it was observed that the capability of having more resonances in molecular graph is not appropriate and since most of the functional groups belonging to polar groups can represent the presence, therefore, the replacing of more polar groups should be avoided addressing to the negative effect of EEig05r and R5p descriptors. It was also seen that distance of substituents from mass center would cause negative effect on biological activities. However, a good biological activity can be presented if the charge transfer between bonding network and steric contributions to molecules shape increase.</p></sec></sec>
    <sec sec-type="conclusions">
      <title>Conclusion</title><p>A robust QSAR model was developed based on PCA-GA-MLR for a dataset consisting of 72 HCV NS5B polymerase inhibitors. The derived models were validated based on several validation techniques, and it was observed that GA-MLR is more accurate than the derived SW-MLR model. Based on the obtained results of GA-MLR, it was observed that the capability of having more resonances in molecular graph is not appropriate and since most of the functional groups belonging to polar groups can represent the presence, therefore, the replacing of more polar groups should be avoided addressing to the negative effect of EEig05r and R5p descriptors. It was also seen that distance of substituents from mass center would cause negative impact over biological activities. However, a good biological activity can be presented if the charge transfer between bonding network and steric contributions to molecules shape increase. In this work, the proposed models could identify and provide better insights about the chemical structure requirements for increasing the pIC<sub>50</sub> values.</p></sec>
  </body>
  <back>
    <ref-list>
      <ref id="R1">
        <label>1</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Abdi</surname>
              <given-names>H</given-names>
            </name>
            <name>
              <surname>Williams</surname>
              <given-names>LJ</given-names>
            </name>
          </person-group>
          <article-title>Principal component analysis</article-title>
          <source>Wiley Interdisciplinary Reviews: Comput Stat</source>
          <year>2010</year>
          <volume>2</volume>
          <fpage>433</fpage>
          <lpage>459</lpage>
        </citation>
      </ref>
      <ref id="R2">
        <label>2</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Alter</surname>
              <given-names>MJ</given-names>
            </name>
            <name>
              <surname>Kruszon-Moran</surname>
              <given-names>D</given-names>
            </name>
            <name>
              <surname>Nainan</surname>
              <given-names>OV</given-names>
            </name>
            <name>
              <surname>McQuillan</surname>
              <given-names>GM</given-names>
            </name>
            <name>
              <surname>Gao</surname>
              <given-names>F</given-names>
            </name>
            <name>
              <surname>Moyer</surname>
              <given-names>LA</given-names>
            </name>
            <etal />
          </person-group>
          <article-title>The prevalence of hepatitis C virus infection in the United States, 1988 through 1994</article-title>
          <source>N Engl J Med</source>
          <year>1999</year>
          <volume>341</volume>
          <fpage>556</fpage>
          <lpage>562</lpage>
        </citation>
      </ref>
      <ref id="R3">
        <label>3</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Alter</surname>
              <given-names>MJ</given-names>
            </name>
            <name>
              <surname>Margolis</surname>
              <given-names>HS</given-names>
            </name>
            <name>
              <surname>Krawczynski</surname>
              <given-names>K</given-names>
            </name>
            <name>
              <surname>Judson</surname>
              <given-names>FN</given-names>
            </name>
            <name>
              <surname>Mares</surname>
              <given-names>A</given-names>
            </name>
            <name>
              <surname>Alexander</surname>
              <given-names>WJ</given-names>
            </name>
            <etal />
          </person-group>
          <article-title>The natural history of community-acquired hepatitis C in the United States</article-title>
          <source>N Engl J Med</source>
          <year>1992</year>
          <volume>327</volume>
          <fpage>1899</fpage>
          <lpage>1905</lpage>
        </citation>
      </ref>
      <ref id="R4">
        <label>4</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Asadollahi</surname>
              <given-names>T</given-names>
            </name>
            <name>
              <surname>Dadfarnia</surname>
              <given-names>S</given-names>
            </name>
            <name>
              <surname>Shabani</surname>
              <given-names>AMH</given-names>
            </name>
            <name>
              <surname>Ghasemi</surname>
              <given-names>JB</given-names>
            </name>
            <name>
              <surname>Sarkhosh</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>QSAR models for CXCR2 receptor antagonists based on the genetic algorithm for data preprocessing prior to application of the PLS linear regression method and design of the new compounds using in silico virtual screening</article-title>
          <source>Molecules</source>
          <year>2011</year>
          <volume>16</volume>
          <fpage>1928</fpage>
          <lpage>1955</lpage>
        </citation>
      </ref>
      <ref id="R5">
        <label>5</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Beaulieu</surname>
              <given-names>PL</given-names>
            </name>
            <name>
              <surname>Chabot</surname>
              <given-names>C</given-names>
            </name>
            <name>
              <surname>Duan</surname>
              <given-names>J</given-names>
            </name>
            <name>
              <surname>Garneau</surname>
              <given-names>M</given-names>
            </name>
            <name>
              <surname>Gillard</surname>
              <given-names>J</given-names>
            </name>
            <name>
              <surname>Jolicoeur</surname>
              <given-names>E</given-names>
            </name>
            <etal />
          </person-group>
          <article-title>Indole 5-carboxamide Thumb Pocket I inhibitors of HCV NS5B polymerase with nanomolar potency in cell-based subgenomic replicons (part 2): Central amino acid linker and right-hand-side SAR studies</article-title>
          <source>Bioorg Med Chem Lett</source>
          <year>2011</year>
          <volume>21</volume>
          <fpage>3664</fpage>
          <lpage>3670</lpage>
        </citation>
      </ref>
      <ref id="R6">
        <label>6</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Beaulieu</surname>
              <given-names>PL</given-names>
            </name>
            <name>
              <surname>Gillard</surname>
              <given-names>J</given-names>
            </name>
            <name>
              <surname>Jolicoeur</surname>
              <given-names>E</given-names>
            </name>
            <name>
              <surname>Duan</surname>
              <given-names>J</given-names>
            </name>
            <name>
              <surname>Garneau</surname>
              <given-names>M</given-names>
            </name>
            <name>
              <surname>Kukolj</surname>
              <given-names>G</given-names>
            </name>
            <etal />
          </person-group>
          <article-title>From benzimidazole to indole-5-carboxamide Thumb Pocket I inhibitors of HCV NS5B polymerase. Part 1: Indole C-2 SAR and discovery of diamide derivatives with nanomolar potency in cell-based subgenomic replicons</article-title>
          <source>Bioorg Med Chem Lett</source>
          <year>2011</year>
          <volume>21</volume>
          <fpage>3658</fpage>
          <lpage>3663</lpage>
        </citation>
      </ref>
      <ref id="R7">
        <label>7</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Bressanelli</surname>
              <given-names>S</given-names>
            </name>
            <name>
              <surname>Tomei</surname>
              <given-names>L</given-names>
            </name>
            <name>
              <surname>Rey</surname>
              <given-names>FA</given-names>
            </name>
            <name>
              <surname>De Francesco</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Structural analysis of the hepatitis C virus RNA polymerase in complex with ribonucleotides</article-title>
          <source>J Virol</source>
          <year>2002</year>
          <volume>76</volume>
          <fpage>3482</fpage>
          <lpage>3492</lpage>
        </citation>
      </ref>
      <ref id="R8">
        <label>8</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Choo</surname>
              <given-names>QL</given-names>
            </name>
            <name>
              <surname>Kuo</surname>
              <given-names>G</given-names>
            </name>
            <name>
              <surname>Weiner</surname>
              <given-names>AJ</given-names>
            </name>
            <name>
              <surname>Overby</surname>
              <given-names>LR</given-names>
            </name>
            <name>
              <surname>Bradley</surname>
              <given-names>DW</given-names>
            </name>
            <name>
              <surname>Houghton</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Isolation of a cDNA clone derived from a blood-borne non-A, non-B viral hepatitis genome</article-title>
          <source>Science</source>
          <year>1989</year>
          <volume>244</volume>
          <fpage>359</fpage>
          <lpage>362</lpage>
        </citation>
      </ref>
      <ref id="R9">
        <label>9</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Consonni</surname>
              <given-names>V</given-names>
            </name>
            <name>
              <surname>Todeschini</surname>
              <given-names>R</given-names>
            </name>
            <name>
              <surname>Pavan</surname>
              <given-names>M</given-names>
            </name>
            <name>
              <surname>Gramatica</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Structure&#x2F;response correlations and similarity&#x2F; diversity analysis by GETAWAY descriptors. 2. Application of the novel 3D molecular descriptors to QSAR&#x2F;QSPR studies</article-title>
          <source>J Chem Inf Model</source>
          <year>2002</year>
          <volume>42</volume>
          <fpage>693</fpage>
          <lpage>705</lpage>
        </citation>
      </ref>
      <ref id="R10">
        <label>10</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Cornberg</surname>
              <given-names>M</given-names>
            </name>
            <name>
              <surname>H&#xFC;ppe</surname>
              <given-names>D</given-names>
            </name>
            <name>
              <surname>Wiegand</surname>
              <given-names>J</given-names>
            </name>
            <name>
              <surname>Felten</surname>
              <given-names>G</given-names>
            </name>
            <name>
              <surname>Wedemeyer</surname>
              <given-names>H</given-names>
            </name>
            <name>
              <surname>Manns</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Treatment of chronic hepatitis C with PEG-interferon alpha-2b and ribavirin: 24 weeks of therapy are sufficient for HCV genotype 2 and 3</article-title>
          <source>Z Gastroenterol</source>
          <year>2003</year>
          <volume>41</volume>
          <fpage>517</fpage>
          <lpage>522</lpage>
        </citation>
      </ref>
      <ref id="R11">
        <label>11</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Dillon</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>What is the best treatment</article-title>
          <source>J Viral Hepat</source>
          <year>2004</year>
          <volume>11</volume>
          <fpage>23</fpage>
          <lpage>27</lpage>
        </citation>
      </ref>
      <ref id="R12">
        <label>12</label>
        <citation citation-type="book">
          <person-group person-group-type="author">
            <name>
              <surname>Draper</surname>
              <given-names>NR</given-names>
            </name>
            <name>
              <surname>Smith</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <source>Applied regression analysis</source>
          <year>1981</year>
          <edition>2nd</edition>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>John Wiley &#x26; Sons, Inc</publisher-name>
        </citation>
      </ref>
      <ref id="R13">
        <label>13</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Golbraikh</surname>
              <given-names>A</given-names>
            </name>
            <name>
              <surname>Tropsha</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Beware of q2&#x21;</article-title>
          <source>J Mol Graph Model</source>
          <year>2002</year>
          <volume>20</volume>
          <fpage>269</fpage>
          <lpage>276</lpage>
        </citation>
      </ref>
      <ref id="R14">
        <label>14</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Grakoui</surname>
              <given-names>A</given-names>
            </name>
            <name>
              <surname>Wychowski</surname>
              <given-names>C</given-names>
            </name>
            <name>
              <surname>Lin</surname>
              <given-names>C</given-names>
            </name>
            <name>
              <surname>Feinstone</surname>
              <given-names>S</given-names>
            </name>
            <name>
              <surname>Rice</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Expression and identification of hepatitis C virus polyprotein cleavage products</article-title>
          <source>J Virol</source>
          <year>1993</year>
          <volume>67</volume>
          <fpage>1385</fpage>
          <lpage>1395</lpage>
        </citation>
      </ref>
      <ref id="R15">
        <label>15</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Hadizadeh</surname>
              <given-names>F</given-names>
            </name>
            <name>
              <surname>Vahdani</surname>
              <given-names>S</given-names>
            </name>
            <name>
              <surname>Jafarpour</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Quantitative structure-activity relationship studies of 4-imidazolyl-1, 4-dihydropyridines as calcium channel blockers</article-title>
          <source>Iran J Basic Med Sci</source>
          <year>2013</year>
          <volume>16</volume>
          <fpage>910</fpage>
          <lpage>916</lpage>
        </citation>
      </ref>
      <ref id="R16">
        <label>16</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Hall</surname>
              <given-names>LH</given-names>
            </name>
            <name>
              <surname>Kier</surname>
              <given-names>LB</given-names>
            </name>
          </person-group>
          <article-title>Electrotopological state indices for atom types: a novel combination of electronic, topological, and valence state information</article-title>
          <source>J Chem Inf Comput Sci</source>
          <year>1995</year>
          <volume>35</volume>
          <fpage>1039</fpage>
          <lpage>1045</lpage>
        </citation>
      </ref>
      <ref id="R17">
        <label>17</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Hijikata</surname>
              <given-names>M</given-names>
            </name>
            <name>
              <surname>Kato</surname>
              <given-names>N</given-names>
            </name>
            <name>
              <surname>Ootsuyama</surname>
              <given-names>Y</given-names>
            </name>
            <name>
              <surname>Nakagawa</surname>
              <given-names>M</given-names>
            </name>
            <name>
              <surname>Shimotohno</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Gene mapping of the putative structural region of the hepatitis C virus genome by in vitro processing analysis</article-title>
          <source>Proc Natl Acad Sci U S A</source>
          <year>1991</year>
          <volume>88</volume>
          <fpage>5547</fpage>
          <lpage>5551</lpage>
        </citation>
      </ref>
      <ref id="R18">
        <label>18</label>
        <citation citation-type="book">
          <person-group person-group-type="author">
            <name>
              <surname>Holland</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>Adaptation in natural and artificial systems</source>
          <year>1975</year>
          <publisher-loc>Ann Arbor, MI</publisher-loc>
          <publisher-name>University of Michigan Press</publisher-name>
        </citation>
      </ref>
      <ref id="R19">
        <label>19</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>H&#xFC;gle</surname>
              <given-names>T</given-names>
            </name>
            <name>
              <surname>Cerny</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Current therapy and new molecular approaches to antiviral treatment and prevention of hepatitis C</article-title>
          <source>Rev Med Virol</source>
          <year>2003</year>
          <volume>13</volume>
          <fpage>361</fpage>
          <lpage>371</lpage>
        </citation>
      </ref>
      <ref id="R20">
        <label>20</label>
        <citation citation-type="book">
          <collab>HyperChem</collab>
          <source>Molecular modeling system</source>
          <year>2002</year>
          <edition>7.03</edition>
          <publisher-loc>Gainesville, FL</publisher-loc>
          <publisher-name>Hypercube, Inc</publisher-name>
        </citation>
      </ref>
      <ref id="R21">
        <label>21</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Karbakhsh</surname>
              <given-names>R</given-names>
            </name>
            <name>
              <surname>Sabet</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Application of different chemometric tools in QSAR study of azolo-adamantanes against influenza A virus</article-title>
          <source>Res Pharm Sci</source>
          <year>2011</year>
          <volume>6</volume>
          <fpage>23</fpage>
          <lpage>33</lpage>
        </citation>
      </ref>
      <ref id="R22">
        <label>22</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Kaushik-Basu</surname>
              <given-names>N</given-names>
            </name>
            <name>
              <surname>Bopda-Waffo</surname>
              <given-names>A</given-names>
            </name>
            <name>
              <surname>Talele</surname>
              <given-names>TT</given-names>
            </name>
            <name>
              <surname>Basu</surname>
              <given-names>A</given-names>
            </name>
            <name>
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name>
              <surname>Kucukguzel</surname>
              <given-names>SG</given-names>
            </name>
          </person-group>
          <article-title>4-Thiazolidinones: a novel class of hepatitis C virus NS5B polymerase inhibitors. Frontiers in bioscience: a journal and virtual library</article-title>
          <source>Front Biosci</source>
          <year>2007</year>
          <volume>13</volume>
          <fpage>3857</fpage>
          <lpage>3868</lpage>
        </citation>
      </ref>
      <ref id="R23">
        <label>23</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Leardi</surname>
              <given-names>R</given-names>
            </name>
            <name>
              <surname>Boggia</surname>
              <given-names>R</given-names>
            </name>
            <name>
              <surname>Terrile</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Genetic algorithms as a strategy for feature selection</article-title>
          <source>J Chemometr</source>
          <year>1992</year>
          <volume>6</volume>
          <fpage>267</fpage>
          <lpage>281</lpage>
        </citation>
      </ref>
      <ref id="R24">
        <label>24</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Leyssen</surname>
              <given-names>P</given-names>
            </name>
            <name>
              <surname>De Clercq</surname>
              <given-names>E</given-names>
            </name>
            <name>
              <surname>Neyts</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Perspectives for the treatment of infections with flaviviridae</article-title>
          <source>Clin Microbiol Rev</source>
          <year>2000</year>
          <volume>13</volume>
          <fpage>67</fpage>
          <lpage>82</lpage>
        </citation>
      </ref>
      <ref id="R25">
        <label>25</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name>
              <surname>Lei</surname>
              <given-names>B</given-names>
            </name>
            <name>
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name>
              <surname>Li</surname>
              <given-names>S</given-names>
            </name>
            <name>
              <surname>Yao</surname>
              <given-names>X</given-names>
            </name>
            <name>
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
            <etal />
          </person-group>
          <article-title>QSAR study of malonyl&#x2010;CoA decarboxylase inhibitors using GA&#x2010;MLR and a new strategy of consensus modeling</article-title>
          <source>J Comput Chem</source>
          <year>2008</year>
          <volume>29</volume>
          <fpage>2636</fpage>
          <lpage>2647</lpage>
        </citation>
      </ref>
      <ref id="R26">
        <label>26</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Lohmann</surname>
              <given-names>V</given-names>
            </name>
            <name>
              <surname>Koch</surname>
              <given-names>J</given-names>
            </name>
            <name>
              <surname>Bartenschlager</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Processing pathways of the hepatitis C virus proteins</article-title>
          <source>J Hepatol</source>
          <year>1995</year>
          <volume>24</volume>
          <fpage>11</fpage>
          <lpage>19</lpage>
        </citation>
      </ref>
      <ref id="R27">
        <label>27</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Love</surname>
              <given-names>RA</given-names>
            </name>
            <name>
              <surname>Parge</surname>
              <given-names>HE</given-names>
            </name>
            <name>
              <surname>Yu</surname>
              <given-names>X</given-names>
            </name>
            <name>
              <surname>Hickey</surname>
              <given-names>MJ</given-names>
            </name>
            <name>
              <surname>Diehl</surname>
              <given-names>W</given-names>
            </name>
            <name>
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
            <etal />
          </person-group>
          <article-title>Crystallographic identification of a noncompetitive inhibitor binding site on the hepatitis C virus NS5B RNA polymerase enzyme</article-title>
          <source>J Virol</source>
          <year>2003</year>
          <volume>77</volume>
          <fpage>7575</fpage>
          <lpage>7581</lpage>
        </citation>
      </ref>
      <ref id="R28">
        <label>28</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Maryam</surname>
              <given-names>A</given-names>
            </name>
            <name>
              <surname>Mahmoud</surname>
              <given-names>S</given-names>
            </name>
            <name>
              <surname>Mehdi</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>QSAR study on the histamine (H3) receptor antagonists using the genetic algorithm: Multi parameter linear regression</article-title>
          <source>J Serb Chem Soc</source>
          <year>2012</year>
          <volume>77</volume>
          <fpage>639</fpage>
          <lpage>650</lpage>
        </citation>
      </ref>
      <ref id="R29">
        <label>29</label>
        <citation citation-type="book">
          <person-group person-group-type="author">
            <name>
              <surname>Mathworks</surname>
            </name>
          </person-group>
          <source>Genetic algorithm and direct search toolbox. User&#x27;s guide</source>
          <year>2005</year>
          <publisher-loc>Natick, MA</publisher-loc>
          <publisher-name>The Mathworks Inc</publisher-name>
        </citation>
      </ref>
      <ref id="R30">
        <label>30</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Niazi</surname>
              <given-names>A</given-names>
            </name>
            <name>
              <surname>Bozorghi</surname>
              <given-names>SJ</given-names>
            </name>
            <name>
              <surname>Shargh</surname>
              <given-names>DN</given-names>
            </name>
          </person-group>
          <article-title>Prediction of acidity constants of thiazolidine-4-carboxylic acid derivatives using ab initio and genetic algorithm-partial least squares</article-title>
          <source>Turk J Chem</source>
          <year>2006</year>
          <volume>30</volume>
          <fpage>619</fpage>
          <lpage>628</lpage>
        </citation>
      </ref>
      <ref id="R31">
        <label>31</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Noorizadeh</surname>
              <given-names>H</given-names>
            </name>
            <name>
              <surname>Farmany</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Theoretical prediction for the half wave reduction potential of organic molecules</article-title>
          <source>Russ J Electrochem</source>
          <year>2014</year>
          <volume>50</volume>
          <fpage>579</fpage>
          <lpage>586</lpage>
        </citation>
      </ref>
      <ref id="R32">
        <label>32</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Pourbasheer</surname>
              <given-names>E</given-names>
            </name>
            <name>
              <surname>Aalizadeh</surname>
              <given-names>R</given-names>
            </name>
            <name>
              <surname>Ganjali</surname>
              <given-names>M</given-names>
            </name>
            <name>
              <surname>Norouzi</surname>
              <given-names>P</given-names>
            </name>
            <name>
              <surname>Banaei</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>QSAR study of mGlu5 inhibitors by genetic algorithm-multiple linear regressions</article-title>
          <source>Med Chem Res</source>
          <year>2014</year>
          <volume>23</volume>
          <fpage>3082</fpage>
          <lpage>3091</lpage>
        </citation>
      </ref>
      <ref id="R33">
        <label>33</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Pourbasheer</surname>
              <given-names>E</given-names>
            </name>
            <name>
              <surname>Aalizadeh</surname>
              <given-names>R</given-names>
            </name>
            <name>
              <surname>Ganjali</surname>
              <given-names>MR</given-names>
            </name>
            <name>
              <surname>Norouzi</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>QSAR study of IKK&#x3B2; inhibitors by the genetic algorithm: Multiple linear regressions</article-title>
          <source>Med Chem Res</source>
          <year>2014</year>
          <volume>23</volume>
          <fpage>57</fpage>
          <lpage>66</lpage>
        </citation>
      </ref>
      <ref id="R34">
        <label>34</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Pourbasheer</surname>
              <given-names>E</given-names>
            </name>
            <name>
              <surname>Aalizadeh</surname>
              <given-names>R</given-names>
            </name>
            <name>
              <surname>Ganjali</surname>
              <given-names>MR</given-names>
            </name>
            <name>
              <surname>Norouzi</surname>
              <given-names>P</given-names>
            </name>
            <name>
              <surname>Shadmanesh</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>, Methenitis, C. QSAR study of Nav1.7 antagonists by multiple linear regression method based on genetic algorithm (GA-MLR)</article-title>
          <source>Med Chem Res</source>
          <year>2014</year>
          <volume>23</volume>
          <fpage>2264</fpage>
          <lpage>2276</lpage>
        </citation>
      </ref>
      <ref id="R35">
        <label>35</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Pourbasheer</surname>
              <given-names>E</given-names>
            </name>
            <name>
              <surname>Aalizadeh</surname>
              <given-names>R</given-names>
            </name>
            <name>
              <surname>Shokouhi Tabar</surname>
              <given-names>S</given-names>
            </name>
            <name>
              <surname>Ganjali</surname>
              <given-names>MR</given-names>
            </name>
            <name>
              <surname>Norouzi</surname>
              <given-names>P</given-names>
            </name>
            <name>
              <surname>Shadmanesh</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>2D and 3D quantitative structure&#x2013;activity relationship study of hepatitis C virus NS5B Polymerase inhibitors by comparative molecular field analysis and comparative molecular similarity indices analysis methods</article-title>
          <source>J Chem Inf Model</source>
          <year>2014</year>
          <volume>54</volume>
          <fpage>2902</fpage>
          <lpage>2914</lpage>
        </citation>
      </ref>
      <ref id="R36">
        <label>36</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Rathod</surname>
              <given-names>AK</given-names>
            </name>
          </person-group>
          <article-title>Antifungal and antibacterial activities of imidazolylpyrimidines derivatives and their QSAR studies under conventional and microwave-assisted</article-title>
          <source>Int J PharmTech Res</source>
          <year>2011</year>
          <volume>3</volume>
          <fpage>1942</fpage>
          <lpage>1951</lpage>
        </citation>
      </ref>
      <ref id="R37">
        <label>37</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Soltzberg</surname>
              <given-names>LJ</given-names>
            </name>
            <name>
              <surname>Wilkins</surname>
              <given-names>CL</given-names>
            </name>
          </person-group>
          <article-title>Molecular transforms: a potential tool for structure-activity studies</article-title>
          <source>J Am Chem Soc</source>
          <year>1977</year>
          <volume>99</volume>
          <fpage>439</fpage>
          <lpage>443</lpage>
        </citation>
      </ref>
      <ref id="R38">
        <label>38</label>
        <citation citation-type="book">
          <person-group person-group-type="author">
            <name>
              <surname>Todeschini</surname>
              <given-names>R</given-names>
            </name>
            <name>
              <surname>Consonni</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <source>Handbook of molecular descriptors</source>
          <year>2008</year>
          <publisher-loc>Weinheim</publisher-loc>
          <publisher-name>Wiley-VCH</publisher-name>
          <fpage>445</fpage>
        </citation>
      </ref>
      <ref id="R39">
        <label>39</label>
        <citation citation-type="book">
          <person-group person-group-type="author">
            <name>
              <surname>Todeschini</surname>
              <given-names>R</given-names>
            </name>
            <name>
              <surname>Consonni</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <source>Handbook of molecular descriptors</source>
          <year>2000</year>
          <publisher-loc>Weinheim</publisher-loc>
          <publisher-name>Wiley-VCH</publisher-name>
        </citation>
      </ref>
      <ref id="R40">
        <label>40</label>
        <citation citation-type="book">
          <person-group person-group-type="author">
            <name>
              <surname>Todeschini</surname>
              <given-names>R</given-names>
            </name>
            <name>
              <surname>Consonni</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <source>Molecular descriptors for chemoinformatics (2 volumes)</source>
          <year>2009</year>
          <publisher-loc>Weinheim</publisher-loc>
          <publisher-name>Wiley-VCH</publisher-name>
        </citation>
      </ref>
      <ref id="R41">
        <label>41</label>
        <citation citation-type="book">
          <person-group person-group-type="author">
            <name>
              <surname>Todeschini</surname>
              <given-names>R</given-names>
            </name>
            <name>
              <surname>Consonni</surname>
              <given-names>V</given-names>
            </name>
            <name>
              <surname>Mauri</surname>
              <given-names>A</given-names>
            </name>
            <name>
              <surname>Pavan</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <source>DRAGON, software for the calculation of molecular descriptors, version 5.3</source>
          <year>2010</year>
          <publisher-loc>Milan, Italy</publisher-loc>
          <publisher-name>Talete srl</publisher-name>
        </citation>
      </ref>
      <ref id="R42">
        <label>42</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Vahdani</surname>
              <given-names>S</given-names>
            </name>
            <name>
              <surname>Bayat</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>A Quantitative Structure-Activity Relationship (QSAR) Study of anti-cancer drugs</article-title>
          <source>Der Chemica Sinica</source>
          <year>2011</year>
          <volume>2</volume>
          <fpage>235</fpage>
          <lpage>242</lpage>
        </citation>
      </ref>
      <ref id="R43">
        <label>43</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Walker</surname>
              <given-names>MP</given-names>
            </name>
            <name>
              <surname>Appleby</surname>
              <given-names>TC</given-names>
            </name>
            <name>
              <surname>Zhong</surname>
              <given-names>W</given-names>
            </name>
            <name>
              <surname>Lau</surname>
              <given-names>J</given-names>
            </name>
            <name>
              <surname>Hong</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Hepatitis C virus therapies: current treatments, targets and future perspectives</article-title>
          <source>Antivir Chem Chemother</source>
          <year>2003</year>
          <volume>14</volume>
          <fpage>1</fpage>
          <lpage>22</lpage>
        </citation>
      </ref>
      <ref id="R44">
        <label>44</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Wang</surname>
              <given-names>QM</given-names>
            </name>
            <name>
              <surname>Heinz</surname>
              <given-names>BA</given-names>
            </name>
          </person-group>
          <article-title>Recent advances in prevention and treatment of hepatitis C virus infections</article-title>
          <source>Prog Drug Res</source>
          <year>2000</year>
          <volume>55</volume>
          <fpage>1</fpage>
          <lpage>32</lpage>
        </citation>
      </ref>
      <ref id="R45">
        <label>45</label>
        <citation citation-type="journal">
          <person-group>
            <name>
              <surname>Wasley</surname>
              <given-names>A</given-names>
            </name>
            <name>
              <surname>Alter</surname>
              <given-names>MJ</given-names>
            </name>
          </person-group>
          <article-title>Epidemiology of hepatitis C: geographic differences and temporal trends</article-title>
          <source>Semin Liver Dis</source>
          <year>2000</year>
          <volume>20</volume>
          <fpage>1</fpage>
          <lpage>16</lpage>
        </citation>
      </ref>
    </ref-list>
  </back>
  <floats-wrap>
    <fig id="T1" position="float">
      <label>Table 1</label>
      <caption><title>Table1: Chemical structures and the corresponding observed and predicted pIC<sub>50</sub> values by GA-MLR method</title></caption>
      <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="EXCLI-15-38-t-001" />
    </fig>
    <fig id="T2" position="float">
      <label>Table 2</label>
      <caption><title>Golbraikh and Tropsha acceptable model criteria&#x27;s for SW-MLR and GA-MLR</title></caption>
      <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="EXCLI-15-38-t-002" />
    </fig>
    <fig id="T3" position="float">
      <label>Table 3</label>
      <caption><title>Correlation coefficient matrix of the selected descriptors with their VIF values</title></caption>
      <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="EXCLI-15-38-t-003" />
    </fig>
    <fig id="F1" position="float">
      <label>Figure 1</label>
      <caption><title>Principle component analysis with PC<sub>1</sub> and PC<sub>2</sub> with test set for GA-MLR result</title></caption>
      <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="EXCLI-15-38-g-001" />
    </fig>
    <fig id="F2" position="float">
      <label>Figure 2</label>
      <caption><title>PC<sub>1</sub>-PC<sub>2</sub> loadings plot using the six descriptors for the best model (GA-MLR)</title></caption>
      <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="EXCLI-15-38-g-002" />
    </fig>
    <fig id="F3" position="float">
      <label>Figure 3</label>
      <caption><title>The predicted pIC<sub>50</sub> values by the GA-MLR modeling vs. the experimental pIC<sub>50</sub> values</title></caption>
      <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="EXCLI-15-38-g-003" />
    </fig>
    <fig id="F4" position="float">
      <label>Figure 4</label>
      <caption><title><italic>R</italic><sup>2</sup><sub>train</sub> and <italic>Q</italic><sup>2</sup><sub>LOO</sub> values after several Y-randomization tests for GA-MLR</title></caption>
      <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="EXCLI-15-38-g-004" />
    </fig>
    <fig id="F5" position="float">
      <label>Figure 5</label>
      <caption><title>The William plot for the predictive GA-MLR model</title></caption>
      <graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="EXCLI-15-38-g-005" />
    </fig>
  </floats-wrap>
</article>