<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN" "http://jats.nlm.nih.gov/publishing/1.1/JATS-journalpublishing1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.1">
   <front>
      <journal-meta>
         <journal-id journal-id-type="publisher-id">peerj-cs</journal-id>
         <journal-id journal-id-type="pmc">peerj-cs</journal-id>
         <journal-id journal-id-type="nlm-ta">PeerJ Comput. Sci.</journal-id>
         <journal-title-group>
            <journal-title>PeerJ Computer Science</journal-title>
            <abbrev-journal-title abbrev-type="publisher">PeerJ Comput. Sci.</abbrev-journal-title>
         </journal-title-group>
         <issn pub-type="epub">2376-5992</issn>
         <publisher>
            <publisher-name>PeerJ Inc.</publisher-name>
            <publisher-loc>San Diego, USA</publisher-loc>
         </publisher>
      </journal-meta>
      <article-meta>
         <article-id pub-id-type="publisher-id">cs-1583</article-id>
         <article-id pub-id-type="doi">10.7717/peerj-cs.1583</article-id>
         <article-categories>
            <subj-group subj-group-type="categories">
               <subject>Artificial Intelligence</subject>
               <subject>Computer Vision</subject>
               <subject>Neural Networks</subject>
            </subj-group>
         </article-categories>
         <title-group>
            <article-title>Class incremental learning of remote sensing images based on class similarity distillation</article-title>
         </title-group>
         <contrib-group content-type="authors">
            <contrib id="author-1" contrib-type="author">
               <name>
                  <surname>Shen</surname>
                  <given-names>Mingge</given-names>
               </name><xref ref-type="aff" rid="aff-1">1</xref><xref ref-type="aff" rid="aff-2">2</xref></contrib>
            <contrib id="author-2" contrib-type="author" corresp="yes">
               <name>
                  <surname>Chen</surname>
                  <given-names>Dehu</given-names>
               </name>
               <email>chendehu01@163.com</email><xref ref-type="aff" rid="aff-3">3</xref><xref ref-type="aff" rid="aff-4">4</xref></contrib>
            <contrib id="author-3" contrib-type="author">
               <name>
                  <surname>Hu</surname>
                  <given-names>Silan</given-names>
               </name><xref ref-type="aff" rid="aff-5">5</xref></contrib>
            <contrib id="author-4" contrib-type="author">
               <name>
                  <surname>Xu</surname>
                  <given-names>Gang</given-names>
               </name><xref ref-type="aff" rid="aff-1">1</xref><xref ref-type="aff" rid="aff-2">2</xref></contrib>
            <aff id="aff-1"><label>1</label><institution>Zhejiang College of Security Technology, College of Intelligent Equipment</institution>, <city>Wenzhou</city>, <state>Zhejiang</state>, <country>China</country></aff>
            <aff id="aff-2"><label>2</label><institution>Zhejiang College of Security Technology, Wenzhou Key Laboratory of Stereoscopic and Intelligent Monitoring and Warning of Natural Disasters</institution>, <city>Wenzhou</city>, <state>Zhejiang</state>, <country>China</country></aff>
            <aff id="aff-3"><label>3</label><institution>Wenzhou University of Technology, College of Architecture and Energy Engineering</institution>, <city> Wenzhou</city>, <state>Zhejiang</state>, <country>China</country></aff>
            <aff id="aff-4"><label>4</label><institution>Wenzhou University of Technology, Wenzhou Key Laboratory of Intelligent Lifeline Protection and Emergency Technology for Resilient City</institution>, <city>Wenzhou</city>, <state>Zhejiang</state>, <country>China</country></aff>
            <aff id="aff-5"><label>5</label><institution>Macau University of Science and Technology, Faculty of Innovation Engineering</institution>, <city>Macau</city>, <state>Macau</state>, <country>China</country></aff>
         </contrib-group>
         <contrib-group content-type="editors">
            <contrib contrib-type="editor">
               <name>
                  <surname>Sen Gupta</surname>
                  <given-names>Ananya</given-names>
               </name>
            </contrib>
         </contrib-group>
         <pub-date pub-type="epub" date-type="pub" iso-8601-date="2023-09-27">
            <day>27</day>
            <month>9</month>
            <year iso-8601-date="2023">2023</year>
         </pub-date>
         <volume>9</volume>
         <elocation-id>e1583</elocation-id>
         <history>
            <date date-type="received" iso-8601-date="2022-12-21">
               <day>21</day>
               <month>12</month>
               <year iso-8601-date="2022">2022</year>
            </date>
            <date date-type="accepted" iso-8601-date="2023-08-20">
               <day>20</day>
               <month>8</month>
               <year iso-8601-date="2023">2023</year>
            </date>
         </history>
         <permissions>
            <copyright-statement>©2023 Shen et al.</copyright-statement>
            <copyright-year>2023</copyright-year>
            <copyright-holder>Shen et al.</copyright-holder>
            <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
               <license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, reproduction and adaptation in any medium and for any purpose provided that it is properly attributed. For attribution, the original author(s), title, publication source (PeerJ Computer Science) and either DOI or URL of the article must be cited.</license-p>
            </license>
         </permissions>
         <self-uri xlink:href="https://peerj.com/articles/cs-1583"/>
         <abstract>
            <p>When a well-trained model learns a new class, the data distribution differences between the new and old classes inevitably cause catastrophic forgetting in order to perform better in the new class. This behavior differs from human learning. In this article, we propose a class incremental object detection method for remote sensing images to address the problem of catastrophic forgetting caused by distribution differences among different classes. First, we introduce a class similarity distillation (CSD) loss based on the similarity between new and old class prototypes, ensuring the model’s plasticity to learn new classes and stability to detect old classes. Second, to better extract class similarity features, we propose a global similarity distillation (GSD) loss that maximizes the mutual information between the new class feature and old class features. Additionally, we present a region proposal network (RPN)-based method that assigns positive and negative labels to prevent mislearning issues. Experiments demonstrate that our method is more accurate for class incremental learning on public DOTA and DIOR datasets and significantly improves training efficiency compared to state-of-the-art class incremental object detection methods.</p>
         </abstract>
         <kwd-group kwd-group-type="author">
            <kwd>Class incremental learning</kwd>
            <kwd>Class similarity distillation</kwd>
            <kwd>Global similarity distillation</kwd>
            <kwd>Catastrophic forgetting</kwd>
            <kwd>Remote sensing</kwd>
         </kwd-group>
         <funding-group>
            <funding-statement>The authors received no funding for this work.</funding-statement>
         </funding-group>
      </article-meta>
   </front>
   <body>
      <sec sec-type="intro">
         <title>Introduction</title>
         <p>In various industries such as urban planning, security monitoring, outer space exploration, and many others, remote sensing image processing is widely utilized. It has consistently been a focal point in computer vision due to its high resolution, significant differences in object size distribution within images, and varying orientations. In recent years, the development of deep learning technology has enabled some methods to effectively handle small and multi-directional objects (<xref ref-type="bibr" rid="ref-49">Xiaolin et al., 2022</xref>; <xref ref-type="bibr" rid="ref-28">Ming et al., 2021</xref>). However, existing methods do not allow for continuous learning of new classes in a human-like manner. In other words, when the model learns a new class, it must retrain with samples from both the previously learned class and the new class to achieve satisfactory results. Otherwise, the model will experience catastrophic forgetting. This learning process differs from that of humans. Furthermore, storing samples from old classes consumes a considerable amount of storage space.</p>
         <p>For this reason, developing a model that can learn new classes without using old samples and avoid catastrophic forgetting is essential. Some methods attempt to address this issue by updating the parameters of new tasks in the orthogonal space of old tasks from an optimization perspective, thus mitigating forgetting to some extent (<xref ref-type="bibr" rid="ref-16">Kirkpatrick et al., 2017</xref>; <xref ref-type="bibr" rid="ref-21">Li &amp; Hoiem, 2017</xref>). Other methods (<xref ref-type="bibr" rid="ref-33">Rebuffi et al., 2017</xref>; <xref ref-type="bibr" rid="ref-39">Rolnick et al., 2019</xref>) adopt a rehearsal mechanism, similar to human review. When learning new tasks, they include a small number of training samples from old tasks. Distillation (<xref ref-type="bibr" rid="ref-18">Lee et al., 2019</xref>; <xref ref-type="bibr" rid="ref-52">Yang &amp; Cai, 2022</xref>) is widely employed in these methods to ensure the model performs well across all tasks. Yet other methods (<xref ref-type="bibr" rid="ref-16">Kirkpatrick et al., 2017</xref>; <xref ref-type="bibr" rid="ref-26">Mallya &amp; Lazebnik, 2018</xref>; <xref ref-type="bibr" rid="ref-9">Fernando et al., 2017</xref>) are based on the over-parameterized characteristics of deep neural networks, activating or expanding neurons for different tasks. However, these methods lack the utilization of learned knowledge, akin to humans reviewing old knowledge to better learn new knowledge. Furthermore, recent work (<xref ref-type="bibr" rid="ref-45">Simon et al., 2022</xref>) employs Mahalanobis similarity as a learning parameter to learn meaningful features, but it still encounters the issue of linearly increasing the number of parameters as the number of tasks increases. Most existing lifelong learning methods assume that tasks originate from the same distribution, ignoring the more general situation where tasks come from different domains.</p>
         <p>There are also incremental object detection methods designed to address catastrophic forgettings, such as <xref ref-type="bibr" rid="ref-23">Liu et al. (2020a)</xref>, which restricts the updating of weights on new classes based on the importance of the impact of a new class on the model and limits the update of weights on new tasks. A regularization term is introduced to constrain the update of model weights on a new class. With a certain number of neurons added to the model to learn the new class, <xref ref-type="bibr" rid="ref-7">Dong et al. (2021)</xref> and <xref ref-type="bibr" rid="ref-42">Shieh et al. (2020)</xref> ensure that the model learns the new class while maintaining the model’s parameters for the old classes simultaneously. In <xref ref-type="bibr" rid="ref-12">Hao, Fu &amp; Jiang (2019a)</xref>, distillation techniques are employed to ensure that the network model remembers the old classes while learning a new one. <xref ref-type="bibr" rid="ref-42">Shieh et al. (2020)</xref> use a replay-based approach, <italic>i.e.,</italic> storing some representative samples of the old classes, and acquiring new knowledge by using new task samples and the stored old samples. However, there are two main problems with existing methods:</p>
         <list id="list-1" list-type="order">
            <list-item><label> 1.</label><p>The existing methods cannot fully exploit the similarity information among classes as humans can. For instance, humans can learn to detect helicopters faster in a model that has learned to detect aircraft.</p>
            </list-item>
            <list-item><label> 2.</label><p>With the increase in classes, a larger model, storage and computational costs will be inevitable, and the model’s accuracy will decrease rapidly.</p>
            </list-item>
         </list>
         <p>To deal with the above issues, the main contributions of this article are concluded as follows:</p>
         <list id="list-2" list-type="order">
            <list-item><label> 1.</label><p>Based on class similarity distillation, we propose a method for class incremental object detection, which can dynamically adjust the distillation weights according to the similarity between new and learned classes, <italic>i.e.,</italic> if the new task is more similar to the old class. In that case, the distillation weights on the new class can be increased to enhance the forward transfer ability of the model and vice versa to ensure the unity of model plasticity and stability.</p>
            </list-item>
            <list-item><label> 2.</label><p>By maximizing the mutual information between the new class and the old task, we propose a global similarity loss (GSL) that maximizes the extraction of similarity information between the new and old classes.</p>
            </list-item>
            <list-item><label> 3.</label><p>The experiments demonstrate that our model can guarantee high accuracy without adding additional storage or computing resources.</p>
            </list-item>
         </list>
         <p>The related work is briefly reviewed in the “Related work” section, and the proposed approach is clarified in the “Methods” section. Experiments and implementation details are provided in the “Results” section to validate our method’s effectiveness using two standard remote sensing datasets. There is further discussion of the article’s shortcomings in the “Discussion” section, and a conclusion is in “Conclusions”.</p>
      </sec>
      <sec>
         <title>Related Work</title>
         <p>In recent years, deep learning-based object detection methods have seen rapid development. Generally, these methods can be classified into two categories: anchor-based, such as the R-CNN series (<xref ref-type="bibr" rid="ref-10">Girshick, 2015</xref>; <xref ref-type="bibr" rid="ref-37">Ren et al., 2015</xref>) and YOLO series (<xref ref-type="bibr" rid="ref-34">Redmon et al., 2016</xref>; <xref ref-type="bibr" rid="ref-35">Redmon &amp; Farhadi, 2017</xref>; <xref ref-type="bibr" rid="ref-36">Redmon &amp; Farhadi, 2018</xref>), and anchor-free, which are not based on preset anchors, such as FCOS (<xref ref-type="bibr" rid="ref-46">Tian et al., 2019</xref>) and DETR (<xref ref-type="bibr" rid="ref-56">Zhu et al., 2020</xref>). Both algorithms are highly accurate in detecting objects, but they cannot handle class incremental learning tasks. In recent years, some class incremental object detection algorithms (<xref ref-type="bibr" rid="ref-51">Yang et al., 2022</xref>; <xref ref-type="bibr" rid="ref-55">Zhang et al., 2021</xref>; <xref ref-type="bibr" rid="ref-47">Ul Haq et al., 2021</xref>) have emerged that can incrementally learn new tasks. These methods are divided into three main categories: parameter isolation-based, replay-based, and regularization-based.</p>
         <p>The first category is the rehearsal-based method, similar to human review. When the model learns new tasks, the impact of old tasks is considered simultaneously, allowing the model to better remember old tasks and avoid catastrophic forgetting. This method widely uses distillation technology, as it can quickly learn new tasks with few samples. The most representative is the ICARL algorithm (<xref ref-type="bibr" rid="ref-33">Rebuffi et al., 2017</xref>), which uses a teacher network and student network to enable all learned tasks to converge quickly with a small number of training samples. Therefore, only a small number of previous task samples need to be stored when learning a new task. To save memory overhead, <xref ref-type="bibr" rid="ref-39">Rolnick et al. (2019)</xref> propose reservoir sampling to limit the number of stored samples to a fixed budget data stream. Continual prototype evolution (CPE) (<xref ref-type="bibr" rid="ref-6">De Lange &amp; Tuytelaars, 2021</xref>) combines the nearest-mean classifier approach with an efficient reservoir-based sampling scheme. More detailed experiments on the rehearsal for lifelong learning are provided in (<xref ref-type="bibr" rid="ref-27">Masana et al., 2020</xref>).</p>
         <p>Compared to directly storing samples, another representative method is GEM (<xref ref-type="bibr" rid="ref-25">Lopez-Paz &amp; Ranzato, 2017</xref>). It stores the gradient of previous tasks instead of training samples, ensuring the direction of the gradient update for new tasks is orthogonal to the previous tasks, reducing interference with prior knowledge. Many methods adopt similar principles. To further save memory space, numerous GAN-based methods are proposed to generate high-quality images and model the data-generating distribution of previous tasks, retraining on generated examples (<xref ref-type="bibr" rid="ref-38">Robins, 1995</xref>; <xref ref-type="bibr" rid="ref-11">Goodfellow et al., 2014</xref>; <xref ref-type="bibr" rid="ref-43">Shin et al., 2017</xref>; <xref ref-type="bibr" rid="ref-53">Ye &amp; Bors, 2021</xref>). Although GAN-based methods reduce storage space, they introduce many additional calculations.</p>
         <p>The second category is the regularization-based method. The main idea of these methods is to add a regularization term of parameter importance, which can reduce the update of essential parameters for old tasks and increase the update of unimportant parameters. To evaluate the importance of parameters, LwF (<xref ref-type="bibr" rid="ref-21">Li &amp; Hoiem, 2017</xref>) limits the update of parameters according to the difference between the new task and the old task. EWC (<xref ref-type="bibr" rid="ref-16">Kirkpatrick et al., 2017</xref>) determines the importance of weight parameters according to the training Fisher information matrix. However, with increased tasks, Fisher regularization will excessively limit the network parameters, resulting in the inability to learn more new tasks. To address this problem, some methods, such as the SI algorithm (<xref ref-type="bibr" rid="ref-54">Zenke, Poole &amp; Ganguli, 2017</xref>), determine the importance of network parameters according to the variation range of network parameters from old tasks to new tasks. However, the parameter update method of random gradient descent often makes the results unstable. In contrast, MAS (<xref ref-type="bibr" rid="ref-2">Aljundi et al., 2018</xref>) allows importance weight estimation to provide datasets without supervision, enabling it to perform user-specific data processing. Variational continuous learning (VCL) (<xref ref-type="bibr" rid="ref-30">Nguyen, Ngo &amp; Nguyen-Xuan, 2017</xref>) uses a variational framework for continuous learning.</p>
         <p>Some Bayesian-based works (<xref ref-type="bibr" rid="ref-1">Ahn et al., 2019</xref>; <xref ref-type="bibr" rid="ref-54">Zenke, Poole &amp; Ganguli, 2017</xref>) estimate the importance of weights online during task training. <xref ref-type="bibr" rid="ref-2">Aljundi et al. (2018)</xref> propose an unsupervised parameter importance evaluation method to increase flexibility and online user adaptability. Further work by <xref ref-type="bibr" rid="ref-17">Lange et al. (2020)</xref> and <xref ref-type="bibr" rid="ref-4">Aljundi, Kelchtermans &amp; Tuytelaars (2019)</xref> extends this method to the case of no task setting. However, these methods are generally difficult to converge.</p>
         <p>The third category is neuron activation or expansion methods, which activate different parameters of the network for different tasks or add additional parameters for new tasks in advance if the deep neural network is over-parameterized. However, the increased number of tasks can easily lead to the saturation of model parameters.</p>
         <p>PackNet (<xref ref-type="bibr" rid="ref-26">Mallya &amp; Lazebnik, 2018</xref>) prunes weights in the network according to the importance of the weights. Only the first 50% of the weight is selected each time to train the current task. HAT (<xref ref-type="bibr" rid="ref-41">Serra et al., 2018</xref>) either freezes previous task parameters or dedicates a model copy to each task when learning new tasks. Alternatively, the architecture remains static, with fixed parts allocated to each task. The previous task parameters are masked during new task training, and each task feature is converted into an embedding. After passing these embeddings, the network converts them into masks. HAT (<xref ref-type="bibr" rid="ref-41">Serra et al., 2018</xref>) takes sparsity as the loss function, which is more intelligent. These works typically require a task oracle, activating corresponding masks or task branches during prediction. Therefore, they are restrained to a multi-head setup, incapable of coping with a shared head between tasks. Expert gate (<xref ref-type="bibr" rid="ref-3">Aljundi, Chakravarty &amp; Tuytelaars, 2017</xref>) avoids this problem by learning an auto-encoder gate.</p>
         <p>Compared to fixed network weight numbers, there are also some methods such as progressive networks (<xref ref-type="bibr" rid="ref-40">Rusu et al., 2016</xref>), dynamic memory networks (<xref ref-type="bibr" rid="ref-32">Perkonigg et al., 2021</xref>), and DER (<xref ref-type="bibr" rid="ref-50">Yan, Xie &amp; He, 2021</xref>) that increase the network structure. Whenever a new task is performed, appropriate neurons are added to train the new task. However, these methods cannot be used for large-scale task learning due to the limitation of the number of parameters.</p>
         <p>In recent years, several works in remote sensing have been using incremental learning methods to detect optical remote sensing images acquired through remote sensing, SAR, and hyperspectral images as a result of the above methods of incremental object detection. These works have been achieving some results by using these incremental learning methods. Although remote sensing image object detection is a complex task, studies have yet to be conducted on class incremental object detection owing to its high complexity. Instead of adapting to unseen new classes, acquiring new samples from old classes will improve the detector rather than adapting to unseen new classes. The article’s authors propose a class incremental learning method based on multiscale features to detect objects in more than one direction. <xref ref-type="bibr" rid="ref-7">Dong et al. (2021)</xref> proposed a method that could reduce the number of new classes by using a class incremental learning method that combines a teacher-student structure and selective distillation to reduce the number of new classes.</p>
         <p>In <xref ref-type="bibr" rid="ref-19">Li et al. (2022)</xref>, a Rank-aware Instance Incremental Learning (RAIL) method, based on the notion of a rank-aware instance incremental learning measure, is proposed. RAIL considers the differences between learning values in data learning order and training loss weights. Rank scores are then used to weigh the training losses to balance the learning contributions. However, existing research on continual object detection is still in its early stages, and current approaches primarily fall into two main categories: experience replay (<xref ref-type="bibr" rid="ref-14">Joseph et al., 2021a</xref>) and knowledge distillation (<xref ref-type="bibr" rid="ref-24">Liu et al., 2020b</xref>; <xref ref-type="bibr" rid="ref-44">Shmelkov, Schmid &amp; Alahari, 2017</xref>). <xref ref-type="bibr" rid="ref-14">Joseph et al. (2021a)</xref> stores representative examples in memory, allowing them to be trained alongside new category samples and fine-tuning the model. <xref ref-type="bibr" rid="ref-44">Shmelkov, Schmid &amp; Alahari (2017)</xref> employs knowledge distillation for both object localization and classification. <xref ref-type="bibr" rid="ref-24">Liu et al. (2020b)</xref> further utilizes attentive feature distillation to extract essential knowledge through both top-down and bottom-up attention mechanisms.</p>
         <p>However, when the distribution of the new class is very different from the distribution of the old class, existing methods based on knowledge distillation cannot effectively learn the information of the new class. Furthermore, even though complex models can be used to increase the detection accuracy of individual tasks, it is detrimental to knowledge distillation when this happens. Based on human learning, the efficiency of learning increases as more knowledge is learned since humans can use the learned similarity information to increase the speed of learning.</p>
         <p>Inspired by human learning behavior, we propose a new method to continuously detect objects in remote sensing images by considering the similarity and differences between new and old classes by utilizing knowledge distillation to its fullest extent. As a result, the efficiency of the model can improve as more knowledge is learned.</p>
      </sec>
      <sec sec-type="methods">
         <title>Methods</title>
         <p>Our proposed class incremental object detection framework is shown in <xref ref-type="fig" rid="fig-1">Fig. 1</xref>. We use the Faster R-CNN detection framework with the backbone of the feature pyramid network (FPN) (<xref ref-type="bibr" rid="ref-22">Lin et al., 2017</xref>). To maximize the similarity between learning tasks, we use class similarity distillation (CSD) loss at the block-wise level and Global Similarity Distillation loss at the instance level. In addition, we use an RPN-based method to assign positive and negative labels to prevent the mislearning problem caused by the new class being taught against the background of the previous class.</p>
         <fig id="fig-1">
            <object-id pub-id-type="doi">10.7717/peerjcs.1583/fig-1</object-id><label>Figure 1</label><caption>
               <title>The framework of proposed method, We use the faster R-CNN detection framework with the backbone of FPN.</title>
               <p>To maximize the similarity between learning tasks, we use class similarity distillation (CSD) loss in the block-wise level and global similarity distillation loss in the instance level.</p>
            </caption>
            <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/cs-1583/fig-1.png"/>
         </fig>
         <sec>
            <title>Problem setting</title>
            <p>Our class incremental learning setup is as follows, given an object detector that has been trained on <italic>C</italic> classes, when a new class <italic>C</italic><sub><italic>n</italic></sub> comes and we are given a dataset <italic>D</italic><sub><italic>n</italic></sub> which comprises a set of pairs (<italic>X</italic><sub><italic>n</italic></sub>, <italic>Y</italic><sub><italic>n</italic></sub>), where <italic>X</italic><sub><italic>n</italic></sub> is an image of size H × W and <italic>Y</italic><sub><italic>n</italic></sub> is the ground-truth. Here, <italic>Y</italic><sub><italic>n</italic></sub> only consists of labels in current classes <italic>C</italic><sub><italic>n</italic></sub>. The model should be able to predict all classes <italic>C</italic><sub>1</sub>: <italic>C</italic><sub><italic>n</italic></sub> in the history.</p>
         </sec>
         <sec>
            <title>Class similarity distillation</title>
            <p>The detail of the proposed CSD is shown in <xref ref-type="fig" rid="fig-2">Fig. 2</xref>. When learning a new class. We train the new model using the new class samples and labels, consider the output of new samples in the old model, and ensure that the new model avoids catastrophic forgetting. In order to avoid the instability caused by large models, we use the CSD at the block level. The proposed CSD can make better use of similar information. After each block, we use the weighted distillation loss to decide the degree of distillation according to the similarity between the new class and the old classes, <italic>i.e.,</italic> if the old classes are more similar to the new classes, then the weights are small in the distillation process of the new class, and vice versa.</p>
            <fig id="fig-2">
               <object-id pub-id-type="doi">10.7717/peerjcs.1583/fig-2</object-id><label>Figure 2</label><caption>
                  <title>The detail of proposed class similarity distillation (CSD).</title>
               </caption>
               <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/cs-1583/fig-2.png"/>
            </fig>
            <p>We first obtain the prototype of new class k by computing an in-batch average shown in <xref ref-type="fig" rid="fig-2">Fig. 2</xref> on <italic>Z</italic> = <italic>R</italic><sup><italic>H</italic>×<italic>W</italic>×<italic>C</italic></sup>. Given a batch of feature maps <italic>B</italic> = <italic>R</italic><sup><italic>B</italic>×<italic>H</italic>×<italic>W</italic>×<italic>C</italic></sup>, we flatten the batch, height and width dimensions and index the as <italic>z</italic><sub><italic>i</italic></sub>, where <italic>i</italic> = 1, …, <italic>BHW</italic>. The centroid of class <italic>c</italic> is computed as <xref ref-type="disp-formula" rid="eqn-1">Eq. (1)</xref> <disp-formula id="eqn-1"><label>(1)</label><alternatives>
                     <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/cs-1583/eqn-1.png"/>
                     <tex-math id="tex-eqn-1"><![CDATA[\begin{eqnarray}{p}_{k}= \frac{\sum _{i=1}^{BHW}{z}_{i}1 \left[ {y}_{i}=k \right] }{k} \end{eqnarray}]]></tex-math>
                     <mml:math id="mml-eqn-1">
                        <mml:mstyle displaystyle="true">
                           <mml:msub>
                              <mml:mrow>
                                 <mml:mi>p</mml:mi>
                              </mml:mrow>
                              <mml:mrow>
                                 <mml:mi>k</mml:mi>
                              </mml:mrow>
                           </mml:msub>
                           <mml:mo>=</mml:mo>
                           <mml:mfrac>
                              <mml:mrow>
                                 <mml:munderover>
                                    <mml:mrow>
                                       <mml:mo mathsize="big" movablelimits="false">∑</mml:mo>
                                    </mml:mrow>
                                    <mml:mrow>
                                       <mml:mi>i</mml:mi>
                                       <mml:mo>=</mml:mo>
                                       <mml:mn>1</mml:mn>
                                    </mml:mrow>
                                    <mml:mrow>
                                       <mml:mi>B</mml:mi>
                                       <mml:mi>H</mml:mi>
                                       <mml:mi>W</mml:mi>
                                    </mml:mrow>
                                 </mml:munderover>
                                 <mml:msub>
                                    <mml:mrow>
                                       <mml:mi>z</mml:mi>
                                    </mml:mrow>
                                    <mml:mrow>
                                       <mml:mi>i</mml:mi>
                                    </mml:mrow>
                                 </mml:msub>
                                 <mml:mn>1</mml:mn>
                                 <mml:mfenced separators="" open="[" close="]">
                                    <mml:mrow>
                                       <mml:msub>
                                          <mml:mrow>
                                             <mml:mi>y</mml:mi>
                                          </mml:mrow>
                                          <mml:mrow>
                                             <mml:mi>i</mml:mi>
                                          </mml:mrow>
                                       </mml:msub>
                                       <mml:mo>=</mml:mo>
                                       <mml:mi>k</mml:mi>
                                    </mml:mrow>
                                 </mml:mfenced>
                              </mml:mrow>
                              <mml:mrow>
                                 <mml:mi>k</mml:mi>
                              </mml:mrow>
                           </mml:mfrac>
                        </mml:mstyle>
                     </mml:math>
                  </alternatives>
               </disp-formula>where 1[<italic>y</italic><sub><italic>i</italic></sub> = <italic>k</italic>] = 1 if the label <italic>y</italic><sub><italic>i</italic></sub> is k, otherwise 1[<italic>y</italic><sub><italic>i</italic></sub> = <italic>k</italic>] = 0. The cummulative prototypes <italic>P</italic><sub>1</sub>:<italic>P</italic><sub><italic>k</italic></sub> of all classes from class 1 to class t are computed at the end of class k.</p>
            <p>We construct a prototype map <italic>m</italic><sub><italic>x</italic></sub> = <italic>R</italic><sup><italic>HxW</italic>×<italic>C</italic></sup> where each pixel <italic>x</italic> contains a prototype vector <italic>m</italic><sub><italic>x</italic></sub> = <italic>p</italic><sub><italic>k</italic></sub> Then we compute a similarity map <italic>S</italic> = <italic>R</italic><sup><italic>HxWxV</italic></sup> between the prototype <italic>m</italic><sub><italic>x</italic></sub> of a new class in each pixel <italic>x</italic> and the prototype is <italic>p</italic><sub><italic>k</italic></sub> of old class. Each entry <italic>S</italic><sub>(<italic>x</italic>,<italic>k</italic>)</sub> is cosine similarity between m and <italic>p</italic><sub><italic>k</italic></sub>, the normalized similarity map S is defined as <disp-formula id="eqn-2"><label>(2)</label><alternatives>
                     <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/cs-1583/eqn-2.png"/>
                     <tex-math id="tex-eqn-2"><![CDATA[\begin{eqnarray}S= \frac{\exp \nolimits \left( \frac{{m}_{x}\cdot {p}_{k}^{t-1}}{ \left\| {m}_{x} \right\| \cdot \left\| {p}_{k}^{t-1} \right\| } \right) }{\sum _{j=1}^{k}\exp \nolimits \left( \frac{{m}_{x}\cdot {p}_{j}^{t-1}}{ \left\| {m}_{x} \right\| \cdot \left\| {p}_{j}^{t-1} \right\| } \right) } .\end{eqnarray}]]></tex-math>
                     <mml:math id="mml-eqn-2">
                        <mml:mstyle displaystyle="true">
                           <mml:mi>S</mml:mi>
                           <mml:mo>=</mml:mo>
                           <mml:mfrac>
                              <mml:mrow>
                                 <mml:mo class="qopname">exp</mml:mo>
                                 <mml:mfenced separators="" open="(" close=")">
                                    <mml:mrow>
                                       <mml:mfrac>
                                          <mml:mrow>
                                             <mml:msub>
                                                <mml:mrow>
                                                   <mml:mi>m</mml:mi>
                                                </mml:mrow>
                                                <mml:mrow>
                                                   <mml:mi>x</mml:mi>
                                                </mml:mrow>
                                             </mml:msub>
                                             <mml:mo>⋅</mml:mo>
                                             <mml:msubsup>
                                                <mml:mrow>
                                                   <mml:mi>p</mml:mi>
                                                </mml:mrow>
                                                <mml:mrow>
                                                   <mml:mi>k</mml:mi>
                                                </mml:mrow>
                                                <mml:mrow>
                                                   <mml:mi>t</mml:mi>
                                                   <mml:mo>−</mml:mo>
                                                   <mml:mn>1</mml:mn>
                                                </mml:mrow>
                                             </mml:msubsup>
                                          </mml:mrow>
                                          <mml:mrow>
                                             <mml:mfenced separators="" open="∥" close="∥">
                                                <mml:mrow>
                                                   <mml:msub>
                                                      <mml:mrow>
                                                         <mml:mi>m</mml:mi>
                                                      </mml:mrow>
                                                      <mml:mrow>
                                                         <mml:mi>x</mml:mi>
                                                      </mml:mrow>
                                                   </mml:msub>
                                                </mml:mrow>
                                             </mml:mfenced>
                                             <mml:mo>⋅</mml:mo>
                                             <mml:mfenced separators="" open="∥" close="∥">
                                                <mml:mrow>
                                                   <mml:msubsup>
                                                      <mml:mrow>
                                                         <mml:mi>p</mml:mi>
                                                      </mml:mrow>
                                                      <mml:mrow>
                                                         <mml:mi>k</mml:mi>
                                                      </mml:mrow>
                                                      <mml:mrow>
                                                         <mml:mi>t</mml:mi>
                                                         <mml:mo>−</mml:mo>
                                                         <mml:mn>1</mml:mn>
                                                      </mml:mrow>
                                                   </mml:msubsup>
                                                </mml:mrow>
                                             </mml:mfenced>
                                          </mml:mrow>
                                       </mml:mfrac>
                                    </mml:mrow>
                                 </mml:mfenced>
                              </mml:mrow>
                              <mml:mrow>
                                 <mml:munderover>
                                    <mml:mrow>
                                       <mml:mo mathsize="big" movablelimits="false"> ∑</mml:mo>
                                    </mml:mrow>
                                    <mml:mrow>
                                       <mml:mi>j</mml:mi>
                                       <mml:mo>=</mml:mo>
                                       <mml:mn>1</mml:mn>
                                    </mml:mrow>
                                    <mml:mrow>
                                       <mml:mi>k</mml:mi>
                                    </mml:mrow>
                                 </mml:munderover>
                                 <mml:mo class="qopname"> exp</mml:mo>
                                 <mml:mfenced separators="" open="(" close=")">
                                    <mml:mrow>
                                       <mml:mfrac>
                                          <mml:mrow>
                                             <mml:msub>
                                                <mml:mrow>
                                                   <mml:mi>m</mml:mi>
                                                </mml:mrow>
                                                <mml:mrow>
                                                   <mml:mi>x</mml:mi>
                                                </mml:mrow>
                                             </mml:msub>
                                             <mml:mo>⋅</mml:mo>
                                             <mml:msubsup>
                                                <mml:mrow>
                                                   <mml:mi>p</mml:mi>
                                                </mml:mrow>
                                                <mml:mrow>
                                                   <mml:mi>j</mml:mi>
                                                </mml:mrow>
                                                <mml:mrow>
                                                   <mml:mi>t</mml:mi>
                                                   <mml:mo>−</mml:mo>
                                                   <mml:mn>1</mml:mn>
                                                </mml:mrow>
                                             </mml:msubsup>
                                          </mml:mrow>
                                          <mml:mrow>
                                             <mml:mfenced separators="" open="∥" close="∥">
                                                <mml:mrow>
                                                   <mml:msub>
                                                      <mml:mrow>
                                                         <mml:mi>m</mml:mi>
                                                      </mml:mrow>
                                                      <mml:mrow>
                                                         <mml:mi>x</mml:mi>
                                                      </mml:mrow>
                                                   </mml:msub>
                                                </mml:mrow>
                                             </mml:mfenced>
                                             <mml:mo>⋅</mml:mo>
                                             <mml:mfenced separators="" open="∥" close="∥">
                                                <mml:mrow>
                                                   <mml:msubsup>
                                                      <mml:mrow>
                                                         <mml:mi>p</mml:mi>
                                                      </mml:mrow>
                                                      <mml:mrow>
                                                         <mml:mi>j</mml:mi>
                                                      </mml:mrow>
                                                      <mml:mrow>
                                                         <mml:mi>t</mml:mi>
                                                         <mml:mo>−</mml:mo>
                                                         <mml:mn>1</mml:mn>
                                                      </mml:mrow>
                                                   </mml:msubsup>
                                                </mml:mrow>
                                             </mml:mfenced>
                                          </mml:mrow>
                                       </mml:mfrac>
                                    </mml:mrow>
                                 </mml:mfenced>
                              </mml:mrow>
                           </mml:mfrac>
                           <mml:mo>.</mml:mo>
                        </mml:mstyle>
                     </mml:math>
                  </alternatives>
               </disp-formula>
            </p>
            <p>Finally the class similarity distillation loss distills the weighted outputs of the old model and the new model: <disp-formula id="eqn-3"><label>(3)</label><alternatives>
                     <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/cs-1583/eqn-3.png"/>
                     <tex-math id="tex-eqn-3"><![CDATA[\begin{eqnarray}Ls(k,x)=\sum _{k=1}^{m}S(x-k)^{2}\end{eqnarray}]]></tex-math>
                     <mml:math id="mml-eqn-3">
                        <mml:mstyle displaystyle="true">
                           <mml:mi>L</mml:mi>
                           <mml:mi>s</mml:mi>
                           <mml:mrow><mml:mfenced separators="" open="(" close=")"><mml:mi>k</mml:mi>
                              <mml:mo>,</mml:mo>
                              <mml:mi>x</mml:mi></mml:mfenced></mml:mrow>
                           <mml:mo>=</mml:mo>
                           <mml:munderover>
                              <mml:mrow>
                                 <mml:mo mathsize="big" movablelimits="false"> ∑</mml:mo>
                              </mml:mrow>
                              <mml:mrow>
                                 <mml:mi>k</mml:mi>
                                 <mml:mo>=</mml:mo>
                                 <mml:mn>1</mml:mn>
                              </mml:mrow>
                              <mml:mrow>
                                 <mml:mi>m</mml:mi>
                              </mml:mrow>
                           </mml:munderover>
                           <mml:mi>S</mml:mi>
                           <mml:msup>
                              <mml:mrow>
                                 <mml:mrow><mml:mfenced separators="" open="(" close=")"><mml:mi>x</mml:mi>
                                    <mml:mo>−</mml:mo>
                                    <mml:mi>k</mml:mi></mml:mfenced></mml:mrow>
                              </mml:mrow>
                              <mml:mrow>
                                 <mml:mn>2</mml:mn>
                              </mml:mrow>
                           </mml:msup>
                        </mml:mstyle>
                     </mml:math>
                  </alternatives>
               </disp-formula>where <italic>k</italic> and <italic>x</italic> are old class and new class features.</p>
            <p>Learning this similarity provides two benefits. As a first step, the model can relate the new class to what it had previously learned, which facilitates the transfer of the old knowledge to the new class for a better learning experience. Second, it encourages the model to learn the underlying class hierarchy implicitly. We do not need to save the class ID and only save the prototype when the new class are well trained so that we can learn the similarity of the new class more quickly.</p>
         </sec>
         <sec>
            <title>Global similarity distillation</title>
            <p>In order to maximize the extraction of correlation features of different class objects in remote sensing images, we propose the global similarity loss (GBL) to maximize the similarity information of old class and new new by maximizing the mutual information in the instance level before classification and regression results. The GBL is shown in <xref ref-type="disp-formula" rid="eqn-4">Eq. (4)</xref> <disp-formula id="eqn-4"><label>(4)</label><alternatives>
                     <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/cs-1583/eqn-4.png"/>
                     <tex-math id="tex-eqn-4"><![CDATA[\begin{eqnarray}{L}_{g}= \frac{S \left( {x}_{t},{y}_{t} \right) }{\sum _{j=1}^{k}S \left( {x}_{j},{y}_{t} \right) } \end{eqnarray}]]></tex-math>
                     <mml:math id="mml-eqn-4">
                        <mml:mstyle displaystyle="true">
                           <mml:msub>
                              <mml:mrow>
                                 <mml:mi>L</mml:mi>
                              </mml:mrow>
                              <mml:mrow>
                                 <mml:mi>g</mml:mi>
                              </mml:mrow>
                           </mml:msub>
                           <mml:mo>=</mml:mo>
                           <mml:mfrac>
                              <mml:mrow>
                                 <mml:mi>S</mml:mi>
                                 <mml:mfenced separators="" open="(" close=")">
                                    <mml:mrow>
                                       <mml:msub>
                                          <mml:mrow>
                                             <mml:mi>x</mml:mi>
                                          </mml:mrow>
                                          <mml:mrow>
                                             <mml:mi>t</mml:mi>
                                          </mml:mrow>
                                       </mml:msub>
                                       <mml:mo>,</mml:mo>
                                       <mml:msub>
                                          <mml:mrow>
                                             <mml:mi>y</mml:mi>
                                          </mml:mrow>
                                          <mml:mrow>
                                             <mml:mi>t</mml:mi>
                                          </mml:mrow>
                                       </mml:msub>
                                    </mml:mrow>
                                 </mml:mfenced>
                              </mml:mrow>
                              <mml:mrow>
                                 <mml:munderover>
                                    <mml:mrow>
                                       <mml:mo mathsize="big" movablelimits="false"> ∑</mml:mo>
                                    </mml:mrow>
                                    <mml:mrow>
                                       <mml:mi>j</mml:mi>
                                       <mml:mo>=</mml:mo>
                                       <mml:mn>1</mml:mn>
                                    </mml:mrow>
                                    <mml:mrow>
                                       <mml:mi>k</mml:mi>
                                    </mml:mrow>
                                 </mml:munderover>
                                 <mml:mi>S</mml:mi>
                                 <mml:mfenced separators="" open="(" close=")">
                                    <mml:mrow>
                                       <mml:msub>
                                          <mml:mrow>
                                             <mml:mi>x</mml:mi>
                                          </mml:mrow>
                                          <mml:mrow>
                                             <mml:mi>j</mml:mi>
                                          </mml:mrow>
                                       </mml:msub>
                                       <mml:mo>,</mml:mo>
                                       <mml:msub>
                                          <mml:mrow>
                                             <mml:mi>y</mml:mi>
                                          </mml:mrow>
                                          <mml:mrow>
                                             <mml:mi>t</mml:mi>
                                          </mml:mrow>
                                       </mml:msub>
                                    </mml:mrow>
                                 </mml:mfenced>
                              </mml:mrow>
                           </mml:mfrac>
                        </mml:mstyle>
                     </mml:math>
                  </alternatives>
               </disp-formula>where <italic>x</italic><sub><italic>t</italic></sub> is the old instance-level class feature, and <italic>y</italic><sub><italic>t</italic></sub> is current instance-level class feature, <italic>x</italic><sub><italic>j</italic></sub> is noisy old class feature. and <italic>S</italic>() is cosine similarity. Maximizing this equation is equivalent to maximizing the relationship between the model discriminated learned and unlearned classes, and maximizing the mutual information of the new class and the old classes.</p>
         </sec>
         <sec>
            <title>Positive and negative samples assignment based on RPN</title>
            <p>In general, the way <xref ref-type="bibr" rid="ref-37">Ren et al. (2015)</xref> assigns positive and negative labels for training samples in remote sensing datasets is based on the size of anchors. Some datasets contain multiple class samples simultaneously. Thus, some unknown class positive samples are labeled as new class negative samples, leading to decreased efficiency and accuracy in learning these samples.</p>
            <p>To solve this problem, we propose an RPN-based technique for assigning positive and negative samples to label potential new classes. Specifically, these new classes will be designated as unknown samples, which means they will not be included in the training of positive and negative samples, thereby avoiding the problem of new tasks appearing in old tasks, which would result in inadequate training.</p>
            <p>Firstly, based on the characteristics of the region proposal network (RPN), which can output the class probability scores and the bounding boxes of almost all objects, our approach is to treat those objects with higher objective scores but do not have higher IoU with ground-truth scores as potential unknown objects that should not be included in the training of the positive and negative samples. Specifically, a negative sample is defined as “1” where the probability score ranking of the last k objects is less than a certain threshold, and at the same time, the IoU of the ground-truth is less than a certain threshold.</p>
         </sec>
         <sec>
            <title>Loss function</title>
            <p>The loss function of the entire framework is shown as <xref ref-type="disp-formula" rid="eqn-5">Eq. (5)</xref>. <disp-formula id="eqn-5"><label>(5)</label><alternatives>
                     <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/cs-1583/eqn-5.png"/>
                     <tex-math id="tex-eqn-5"><![CDATA[\begin{eqnarray}L={L}_{det}+a{L}_{s}+b{L}_{g}.\end{eqnarray}]]></tex-math>
                     <mml:math id="mml-eqn-5">
                        <mml:mstyle displaystyle="true">
                           <mml:mi>L</mml:mi>
                           <mml:mo>=</mml:mo>
                           <mml:msub>
                              <mml:mrow>
                                 <mml:mi>L</mml:mi>
                              </mml:mrow>
                              <mml:mrow>
                                 <mml:mi>d</mml:mi>
                                 <mml:mi>e</mml:mi>
                                 <mml:mi>t</mml:mi>
                              </mml:mrow>
                           </mml:msub>
                           <mml:mo>+</mml:mo>
                           <mml:mi>a</mml:mi>
                           <mml:msub>
                              <mml:mrow>
                                 <mml:mi>L</mml:mi>
                              </mml:mrow>
                              <mml:mrow>
                                 <mml:mi>s</mml:mi>
                              </mml:mrow>
                           </mml:msub>
                           <mml:mo>+</mml:mo>
                           <mml:mi>b</mml:mi>
                           <mml:msub>
                              <mml:mrow>
                                 <mml:mi>L</mml:mi>
                              </mml:mrow>
                              <mml:mrow>
                                 <mml:mi>g</mml:mi>
                              </mml:mrow>
                           </mml:msub>
                           <mml:mo>.</mml:mo>
                        </mml:mstyle>
                     </mml:math>
                  </alternatives>
               </disp-formula>
            </p>
            <p>The first of them is the faster R-CNN detection loss function, the second term is the proposed class similarity distillation loss, and the third is the global similarity loss. We use gradient descent with momentum to optimize the model. During the training period, we first fix the other parameters and train the RPN of new class branches of the parameters to converge, and then we train all the parameters. The results prove the effectiveness of the training method.</p>
         </sec>
      </sec>
      <sec sec-type="results">
         <title>Results</title>
         <p>We used two public remote sensing datasets, DOTA (<xref ref-type="bibr" rid="ref-48">Xia et al., 2018</xref>) and DIOR (<xref ref-type="bibr" rid="ref-20">Li et al., 2020</xref>), to verify the effectiveness of the proposed method; first, we compared with some State-of-the-Art (SOTA) methods, and then we conducted an ablation study to verify the effectiveness of the proposed two distillation loss functions. The specific training parameters were set as follows; we cropped the image to 800×800 size, the batch size was set to 2, the momentum was set to 0.9, the iteration, the number of times, was set to 50,000, the initial learning rate was set to 0.0025, every 10,000 times was reduced to one-tenth of the original, IoU was marked as the correct result when it was significant with 0.7, the RPN output was 128 for both positive and negative samples, and the experiments all used horizontal bounding boxes.</p>
         <p>There are 2,826 images in the DOTA dataset and 188,282 instances with image sizes ranging from 800×800 to 4000×4000, containing 15 classes, and we use the first eight classes as old classes. We incrementally learn the other seven classes.</p>
         <p>There are 11,738 images in the DIOR dataset, and 20 classes contain 190,288 instances. We set the first ten classes are old classes and the last ten classes are new.</p>
         <sec>
            <title>Evaluation criteria</title>
            <p>To obtain a generic model performance estimate, after training task t, we compute the average accuracy (AA) on all testing datasets of tasks T. The average accuracy is defined as <xref ref-type="disp-formula" rid="eqn-6">Eq. (6)</xref>. The higher the average accuracy, the better the performance of the model. <disp-formula id="eqn-6"><label>(6)</label><alternatives>
                     <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/cs-1583/eqn-6.png"/>
                     <tex-math id="tex-eqn-6"><![CDATA[\begin{eqnarray}AA= \frac{1}{T} \sum _{t=1}^{T}( \frac{{\mathrm{TP}}_{t}+{\mathrm{TN}}_{t}}{{\mathrm{P}}_{t}+{\mathrm{N}}_{t}} )\times 100\end{eqnarray}]]></tex-math>
                     <mml:math id="mml-eqn-6">
                        <mml:mstyle displaystyle="true">
                           <mml:mi>A</mml:mi>
                           <mml:mi>A</mml:mi>
                           <mml:mo>=</mml:mo>
                           <mml:mfrac>
                              <mml:mrow>
                                 <mml:mn>1</mml:mn>
                              </mml:mrow>
                              <mml:mrow>
                                 <mml:mi>T</mml:mi>
                              </mml:mrow>
                           </mml:mfrac>
                           <mml:munderover>
                              <mml:mrow>
                                 <mml:mo mathsize="big" movablelimits="false">∑</mml:mo>
                              </mml:mrow>
                              <mml:mrow>
                                 <mml:mi>t</mml:mi>
                                 <mml:mo>=</mml:mo>
                                 <mml:mn>1</mml:mn>
                              </mml:mrow>
                              <mml:mrow>
                                 <mml:mi>T</mml:mi>
                              </mml:mrow>
                           </mml:munderover>
                           <mml:mrow><mml:mfenced separators="" open="(" close=")"><mml:mfrac>
                                 <mml:mrow>
                                    <mml:msub>
                                       <mml:mrow>
                                          <mml:mi mathvariant="normal">TP</mml:mi>
                                       </mml:mrow>
                                       <mml:mrow>
                                          <mml:mi>t</mml:mi>
                                       </mml:mrow>
                                    </mml:msub>
                                    <mml:mo>+</mml:mo>
                                    <mml:msub>
                                       <mml:mrow>
                                          <mml:mi mathvariant="normal">TN</mml:mi>
                                       </mml:mrow>
                                       <mml:mrow>
                                          <mml:mi>t</mml:mi>
                                       </mml:mrow>
                                    </mml:msub>
                                 </mml:mrow>
                                 <mml:mrow>
                                    <mml:msub>
                                       <mml:mrow>
                                          <mml:mi mathvariant="normal">P</mml:mi>
                                       </mml:mrow>
                                       <mml:mrow>
                                          <mml:mi>t</mml:mi>
                                       </mml:mrow>
                                    </mml:msub>
                                    <mml:mo>+</mml:mo>
                                    <mml:msub>
                                       <mml:mrow>
                                          <mml:mi mathvariant="normal">N</mml:mi>
                                       </mml:mrow>
                                       <mml:mrow>
                                          <mml:mi>t</mml:mi>
                                       </mml:mrow>
                                    </mml:msub>
                                 </mml:mrow>
                              </mml:mfrac></mml:mfenced></mml:mrow>
                           <mml:mo>×</mml:mo>
                           <mml:mn>100</mml:mn>
                        </mml:mstyle>
                     </mml:math>
                  </alternatives>
               </disp-formula>where TP and TN are the numbers of correctly classified samples. <italic>P</italic><sub><italic>t</italic></sub> and <italic>N</italic><sub><italic>t</italic></sub> are the number of positive and negative samples for task t. T is the total number of tasks.</p>
         </sec>
         <sec>
            <title>Performance evaluation</title>
            <p>We used ResNet as the uniform backbone, and it can be seen from the AA on both datasets in <xref ref-type="table" rid="table-1">Table 1</xref> that the proposed method improves by 5% compared with the SOTA method FPN-IL (<xref ref-type="bibr" rid="ref-5">Chen et al., 2020</xref>). This is because our method can consider the old class features when learning new classes, thus obtaining a higher AA. Other methods use traditional methods to generate class agnostic RoI or use the dispersion of features before RPN to learn new knowledge and do not fully use the new class information of similarity, so the detection results are unsatisfactory.</p>
            <table-wrap id="table-1">
               <object-id pub-id-type="doi">10.7717/peerjcs.1583/table-1</object-id><label>Table 1</label><caption>
                  <title>The detection results (AA%) of all six compared methods.</title>
               </caption>
               <alternatives>
                  <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/cs-1583/table-1.png"/>
                  <table>
                     <colgroup>
                        <col/>
                        <col/>
                        <col/>
                        <col/>
                        <col/>
                        <col/>
                        <col/>
                     </colgroup>
                     <thead>
                        <tr>
                           <th>Method</th>
                           <th>Basic Archietcture</th>
                           <th>Learning Stategy</th>
                           <th align="center" colspan="2">DOTA AA%</th>
                           <th align="center" colspan="2">DIOR AA%</th>
                        </tr>
                        <tr>
                           <th/>
                           <th/>
                           <th/>
                           <th>Old 8</th>
                           <th>New 7</th>
                           <th>Old 10</th>
                           <th>New 10</th>
                        </tr>
                     </thead>
                     <tbody>
                        <tr>
                           <td rowspan="2">Fast-IL</td>
                           <td rowspan="16">ResNet</td>
                           <td>Incremental</td>
                           <td>26</td>
                           <td>13</td>
                           <td>31</td>
                           <td>19.4</td>
                        </tr>
                        <tr>
                           <td/>
                           <td>Joint-training</td>
                           <td>26.8</td>
                           <td>22.6</td>
                           <td>33.5</td>
                           <td>34.1</td>
                        </tr>
                        <tr>
                           <td rowspan="2">Faster-IL</td>
                           <td/>
                           <td>Incremental</td>
                           <td>36.1</td>
                           <td>26.4</td>
                           <td>36.7</td>
                           <td>47.0</td>
                        </tr>
                        <tr>
                           <td/>
                           <td>Joint-training</td>
                           <td>41.5</td>
                           <td>26.9</td>
                           <td>47.4</td>
                           <td>47.7</td>
                        </tr>
                        <tr>
                           <td rowspan="2">FPN-IIL</td>
                           <td/>
                           <td>Incremental</td>
                           <td>69.2</td>
                           <td>60.7</td>
                           <td>68.8</td>
                           <td>68.1</td>
                        </tr>
                        <tr>
                           <td/>
                           <td>Joint-training</td>
                           <td>69.8</td>
                           <td>60.8</td>
                           <td>69.4</td>
                           <td>71.3</td>
                        </tr>
                        <tr>
                           <td rowspan="2">Meta-ILOD</td>
                           <td/>
                           <td>Incremental</td>
                           <td>70.1</td>
                           <td>61.6</td>
                           <td>69.7</td>
                           <td>68.1</td>
                        </tr>
                        <tr>
                           <td/>
                           <td>Joint-training</td>
                           <td>69.8</td>
                           <td>70.5</td>
                           <td>69.9</td>
                           <td>72.3</td>
                        </tr>
                        <tr>
                           <td rowspan="2">SID</td>
                           <td/>
                           <td>Incremental</td>
                           <td>69.6</td>
                           <td>61.3</td>
                           <td>72.1</td>
                           <td>70.9</td>
                        </tr>
                        <tr>
                           <td/>
                           <td>Joint-training</td>
                           <td>70.3</td>
                           <td>71.9</td>
                           <td>70.3</td>
                           <td>72.1</td>
                        </tr>
                        <tr>
                           <td rowspan="2">ORE</td>
                           <td/>
                           <td>Incremental</td>
                           <td>69.8</td>
                           <td>61.6</td>
                           <td>69.7</td>
                           <td>71.1</td>
                        </tr>
                        <tr>
                           <td/>
                           <td>Joint-training</td>
                           <td>70.9</td>
                           <td>72.4</td>
                           <td>70.5</td>
                           <td>72.2</td>
                        </tr>
                        <tr>
                           <td rowspan="2">CWSD</td>
                           <td/>
                           <td>Incremental</td>
                           <td>68.7</td>
                           <td>60.5</td>
                           <td>68.2</td>
                           <td>70.5</td>
                        </tr>
                        <tr>
                           <td/>
                           <td>Joint-training</td>
                           <td>71.7</td>
                           <td>73.6</td>
                           <td>71.8</td>
                           <td>73.5</td>
                        </tr>
                        <tr>
                           <td rowspan="2">CSD(Ours)</td>
                           <td/>
                           <td>Incremental</td>
                           <td>70.5</td>
                           <td>62.7</td>
                           <td>70.5</td>
                           <td>72.4</td>
                        </tr>
                        <tr>
                           <td>Joint-training</td>
                           <td>71.7</td>
                           <td>73.6</td>
                           <td>71.8</td>
                           <td>73.5</td>
                        </tr>
                     </tbody>
                  </table>
               </alternatives>
            </table-wrap>
            <p><xref ref-type="table" rid="table-1">Table 1</xref> shows the detection results on each class in the new seven classes of the DOTA dataset. The detection result by Fast-IL (<xref ref-type="bibr" rid="ref-44">Shmelkov, Schmid &amp; Alahari, 2017</xref>) is poor in detecting every class, as the detection framework is not effective. The Faster-IL (<xref ref-type="bibr" rid="ref-13">Hao et al., 2019b</xref>) and FPN-IL (<xref ref-type="bibr" rid="ref-5">Chen et al., 2020</xref>) are much better than Fast-IL, but the average accuracy (AA) is lower as the number of classes increases. Meta-ILOD (<xref ref-type="bibr" rid="ref-15">Joseph et al., 2021b</xref>) uses meta-learning to learn a global optimum solution without learning the similarity between classes. SID (<xref ref-type="bibr" rid="ref-31">Peng et al., 2021</xref>) employs distillation in some intermediate features, while our method performs global information distillation at various scales, resulting in better performance compared to SID. The training process of ORE (<xref ref-type="bibr" rid="ref-14">Joseph et al., 2021a</xref>) is more complicated, requiring a long pre-training period to achieve good results. Compared with the CWSD (<xref ref-type="bibr" rid="ref-8">Feng et al., 2021</xref>), the proposed method is supplemented by weighted similarity not only supplements similar features. The proposed method has improved approximately 1% on AA compared to the four most recent methods, and as the classes increase, the detection of the new class does not show a noticeable drop.</p>
            <p>To demonstrate in more detail that the proposed method can learn the similarity information among classes well, we list the average accuracy of each class for each class, as shown in <xref ref-type="table" rid="table-2">Table 2</xref>. In the DOTA dataset, because the class of baseball field (BF) was learned before when learning new categories such as tennis court (TC) and basketball court (BC), which have relatively similar characteristics to a baseball court (BC), the accuracy of our method in detecting these is significantly higher than that of other methods. Since our approach uses the same backbone architecture as FPN-IL, it has similar performance during joint training without having learned from similar samples. However, due to our method’s ability to fully learn similar information, it performs better when learning from similar samples later on, such as SBF, SP, HC, etc. Meta-ILOD (<xref ref-type="bibr" rid="ref-15">Joseph et al., 2021b</xref>) employs meta-learning to obtain a global optimum solution without learning inter-class similarities, while our approach conducts global information distillation at multiple scales, leading to enhanced performance in comparison. The training process of ORE (<xref ref-type="bibr" rid="ref-14">Joseph et al., 2021a</xref>) is complex, and the CWSD (<xref ref-type="bibr" rid="ref-8">Feng et al., 2021</xref>) is not in line with the continual learning setting. Therefore, the proposed method achieves roughly a 1% average improvement in AA compared to the four most recent techniques mentioned above. Although the accuracy of each class varies slightly with the learning order, the overall AA and joint training are comparable due to the learning of the old class similarity by the proposed method, and there is a significant improvement in AA when the similarity task is learned later. This shows that the proposed method is stable and effective.</p>
            <table-wrap id="table-2">
               <object-id pub-id-type="doi">10.7717/peerjcs.1583/table-2</object-id><label>Table 2</label><caption>
                  <title>The detection result (AA%) on each class in new seven classes of DOTA dataset.</title>
               </caption>
               <alternatives>
                  <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/cs-1583/table-2.png"/>
                  <table>
                     <colgroup>
                        <col/>
                        <col/>
                        <col/>
                        <col/>
                        <col/>
                        <col/>
                        <col/>
                        <col/>
                        <col/>
                        <col/>
                     </colgroup>
                     <thead>
                        <tr>
                           <th>Method</th>
                           <th>Basic Architecture</th>
                           <th>Learning Stategy</th>
                           <th align="center" colspan="7">DOTA (New seven Classes) AA%</th>
                        </tr>
                        <tr>
                           <th/>
                           <th/>
                           <th/>
                           <th>BC</th>
                           <th>ST</th>
                           <th>SBF</th>
                           <th>TR</th>
                           <th>Harbor</th>
                           <th>SP</th>
                           <th>HC</th>
                        </tr>
                     </thead>
                     <tbody>
                        <tr>
                           <td rowspan="2">Fast-IL</td>
                           <td rowspan="16">ResNet</td>
                           <td>Incremental</td>
                           <td>19.2</td>
                           <td>17.6</td>
                           <td>28.8</td>
                           <td>19.4</td>
                           <td>17.4</td>
                           <td>13.2</td>
                           <td>4.7</td>
                        </tr>
                        <tr>
                           <td/>
                           <td>Joint-training</td>
                           <td>26.8</td>
                           <td>22.6</td>
                           <td>33.5</td>
                           <td>34.1</td>
                           <td>36.7</td>
                           <td>20.3</td>
                           <td>18.2</td>
                        </tr>
                        <tr>
                           <td rowspan="2">Faster-IL</td>
                           <td/>
                           <td>Incremental</td>
                           <td>36.1</td>
                           <td>26.4</td>
                           <td>36.7</td>
                           <td>47.0</td>
                           <td>42.4</td>
                           <td>35.4</td>
                           <td>9.7</td>
                        </tr>
                        <tr>
                           <td/>
                           <td>Joint-training</td>
                           <td>41.5</td>
                           <td>26.9</td>
                           <td>47.4</td>
                           <td>47.7</td>
                           <td>45.1</td>
                           <td>36.1</td>
                           <td>8.1</td>
                        </tr>
                        <tr>
                           <td rowspan="2">FPN-IL</td>
                           <td/>
                           <td>Incremental</td>
                           <td>69.2</td>
                           <td>60.7</td>
                           <td>68.8</td>
                           <td>68.1</td>
                           <td>70.6</td>
                           <td>62.7</td>
                           <td>45.9</td>
                        </tr>
                        <tr>
                           <td/>
                           <td>Joint-training</td>
                           <td>69.8</td>
                           <td>60.8</td>
                           <td>69.4</td>
                           <td>71.3</td>
                           <td>74.2</td>
                           <td>62.6</td>
                           <td>35.7</td>
                        </tr>
                        <tr>
                           <td rowspan="2">Meta-ILOD</td>
                           <td/>
                           <td>Incremental</td>
                           <td>69.7</td>
                           <td>60.4</td>
                           <td>69.7</td>
                           <td>69.3</td>
                           <td>71.7</td>
                           <td>63.8</td>
                           <td>45.9</td>
                        </tr>
                        <tr>
                           <td/>
                           <td>Joint-training</td>
                           <td>70.3</td>
                           <td>61.3</td>
                           <td>69.5</td>
                           <td>70.8</td>
                           <td>74.2</td>
                           <td>63.3</td>
                           <td>37.1</td>
                        </tr>
                        <tr>
                           <td rowspan="2">SID</td>
                           <td/>
                           <td>Incremental</td>
                           <td>69.6</td>
                           <td>60.5</td>
                           <td>69.6</td>
                           <td>69.3</td>
                           <td>70.4</td>
                           <td>63.8</td>
                           <td>46.9</td>
                        </tr>
                        <tr>
                           <td/>
                           <td>Joint-training</td>
                           <td>70.4</td>
                           <td>62.4</td>
                           <td>70.8</td>
                           <td>71.4</td>
                           <td>75.5</td>
                           <td>62.7</td>
                           <td>36.3</td>
                        </tr>
                        <tr>
                           <td rowspan="2">ORE</td>
                           <td/>
                           <td>Incremental</td>
                           <td>69.4</td>
                           <td>60.4</td>
                           <td>69.1</td>
                           <td>68.1</td>
                           <td>70.6</td>
                           <td>62.7</td>
                           <td>45.9</td>
                        </tr>
                        <tr>
                           <td/>
                           <td>Joint-training</td>
                           <td>70.1</td>
                           <td>61.5</td>
                           <td>69.8</td>
                           <td>71.9</td>
                           <td>74.5</td>
                           <td>62.1</td>
                           <td>36.2</td>
                        </tr>
                        <tr>
                           <td rowspan="2">CWSD</td>
                           <td/>
                           <td>Incremental</td>
                           <td>68.4</td>
                           <td>60.2</td>
                           <td>68.6</td>
                           <td>69.2</td>
                           <td>71.3</td>
                           <td>64.2</td>
                           <td>46.8</td>
                        </tr>
                        <tr>
                           <td/>
                           <td>Joint-training</td>
                           <td>69.8</td>
                           <td>60.8</td>
                           <td>72.4</td>
                           <td>71.3</td>
                           <td>76.9</td>
                           <td>63.8</td>
                           <td>37.2</td>
                        </tr>
                        <tr>
                           <td rowspan="2">CSD(Ours)</td>
                           <td/>
                           <td>Incremental</td>
                           <td>69.5</td>
                           <td>60.7</td>
                           <td>69.1</td>
                           <td>70.4</td>
                           <td>72.5</td>
                           <td>65.3</td>
                           <td>46.8</td>
                        </tr>
                        <tr>
                           <td>Joint-training</td>
                           <td>69.8</td>
                           <td>60.8</td>
                           <td>72.4</td>
                           <td>71.3</td>
                           <td>76.9</td>
                           <td>63.8</td>
                           <td>37.2</td>
                        </tr>
                     </tbody>
                  </table>
               </alternatives>
            </table-wrap>
            <p><xref ref-type="fig" rid="fig-3">Figure 3</xref> shows the visualization detection results of the proposed method on the DOTA dataset with the truck as the old task to learn the new task sedan, and the visualization detection results with the soccer ball field (SBF) as the old task to learn the basketball court (BC) and tennis court (TC). From the detection results, we can see that our method obtains high average accuracy on both new and old classes. In contrast, other methods have many missed detections on the old class, as shown in the red box, which is because our method can learn information about the similarity between classes, preventing catastrophic forgetting while accelerating the learning of new classes.</p>
            <fig id="fig-3">
               <object-id pub-id-type="doi">10.7717/peerjcs.1583/fig-3</object-id><label>Figure 3</label><caption>
                  <title>The visualization detection results of the proposed method on the DOTA dataset.</title>
               </caption>
               <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/cs-1583/fig-3.png"/>
            </fig>
            <p><xref ref-type="fig" rid="fig-4">Figure 4</xref> shows the comparison of the visualization results in the DIOR dataset with low similarity of learning tasks, and since the proposed method can adjust the distillation weights adaptively according to the task similarity, it can also obtain better detection results.</p>
            <fig id="fig-4">
               <object-id pub-id-type="doi">10.7717/peerjcs.1583/fig-4</object-id><label>Figure 4</label><caption>
                  <title>The visualization detection results of the proposed method on the DIOR dataset.</title>
               </caption>
               <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/cs-1583/fig-4.png"/>
            </fig>
            <p>Furthermore, the heatmaps are used to verify the effectiveness of the similarity distillation method we proposed in <xref ref-type="fig" rid="fig-5">Fig. 5</xref>. In the heatmaps, the darker the color of the heat map, the more critical the area is. <xref ref-type="fig" rid="fig-1">Figure 1</xref> shows that we first learn the class SBF and then learn the class BC. From the change in the heat map of the network, the SBF in the bottom right corner of the heatmap (a) is activated. When the network continues to learn the class BC, both areas can be activated, which shows that the proposed incremental learning method can remember the previous knowledge well. Moreover, after learning BC, the activation area of SBF changes from the annular to the central square area, which shows that the network can learn the similarity features between classes.</p>
            <fig id="fig-5">
               <object-id pub-id-type="doi">10.7717/peerjcs.1583/fig-5</object-id><label>Figure 5</label><caption>
                  <title>The heatmaps to verify the effectiveness of the proposed similarity distillation.</title>
               </caption>
               <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/cs-1583/fig-5.png"/>
            </fig>
            <p>Based on the public natural scene image dataset VOC, we tested the class similarity distillation method to verify the effectiveness of class incremental object detection, as shown in <xref ref-type="table" rid="table-3">Table 3</xref>. For CSD in the last row, we used the settings described in the implementation details. To compare, we also replaced the CSD loss with the L2 loss to minimize the distance between the selected features. As a result of the performance of CSD on average accuracy, it is consistently superior to other methods, proving that it is more appropriate to obtain a trade-off between stability and plasticity for continuous object detection by using CSD. For 19+1 and 15+5 tasks, CSD is more effective than the L2 loss on average accuracy. Since CSD enforces the instance-level features of the incremental model to imitate the features of the old incremental model to a high degree, the performance of the old classes can be adequately maintained.</p>
            <table-wrap id="table-3">
               <object-id pub-id-type="doi">10.7717/peerjcs.1583/table-3</object-id><label>Table 3</label><caption>
                  <title>Testing the class similarity distillation way to verify the effectiveness for class incremental object detection.</title>
               </caption>
               <alternatives>
                  <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/cs-1583/table-3.png"/>
                  <table>
                     <colgroup>
                        <col/>
                        <col/>
                        <col/>
                        <col/>
                        <col/>
                        <col/>
                     </colgroup>
                     <thead>
                        <tr>
                           <th>Methods</th>
                           <th>A</th>
                           <th>B</th>
                           <th>C</th>
                           <th>D</th>
                           <th>AA</th>
                        </tr>
                     </thead>
                     <tbody>
                        <tr>
                           <td>Baseline</td>
                           <td>48.75</td>
                           <td>–</td>
                           <td>–</td>
                           <td>–</td>
                           <td>48.75</td>
                        </tr>
                        <tr>
                           <td>Baseline</td>
                           <td>44.12</td>
                           <td>58.34</td>
                           <td/>
                           <td>–</td>
                           <td>51.21</td>
                        </tr>
                        <tr>
                           <td>Baseline</td>
                           <td>30.77</td>
                           <td>33.56</td>
                           <td>56.24</td>
                           <td>–</td>
                           <td>40.87</td>
                        </tr>
                        <tr>
                           <td>Baseline</td>
                           <td>15.33</td>
                           <td>18.25</td>
                           <td>43.28</td>
                           <td>35.66</td>
                           <td>28.54</td>
                        </tr>
                        <tr>
                           <td>Ours</td>
                           <td>48.75</td>
                           <td>–</td>
                           <td>–</td>
                           <td>–</td>
                           <td>48.75</td>
                        </tr>
                        <tr>
                           <td>Ours</td>
                           <td>45.25</td>
                           <td>57.88</td>
                           <td>–</td>
                           <td>–</td>
                           <td>52.24</td>
                        </tr>
                        <tr>
                           <td>Ours</td>
                           <td>31.57</td>
                           <td>34.12</td>
                           <td>57.26</td>
                           <td>–</td>
                           <td>41.23</td>
                        </tr>
                        <tr>
                           <td>Ours</td>
                           <td>16.22</td>
                           <td>19.23</td>
                           <td>44.34</td>
                           <td>36.66</td>
                           <td>29.13</td>
                        </tr>
                     </tbody>
                  </table>
               </alternatives>
            </table-wrap>
            <p>In contrast, the performance of the new classes will be suppressed at the same time. A comparison of CSD and L2 loss on average accuracy shows that CSD is more effective than L2 loss for 19+1 and 15+5 tasks. CSD enforces instance-level features of the incremental model to entirely mimic those of the old model so that the performance of old classes can be maintained simultaneously as the performance of new classes is suppressed simultaneously.</p>
         </sec>
         <sec>
            <title>Ablation study</title>
            <p>An ablation study is performed to validate the contribution of distillation loss in the DOTA dataset. Like the experiment in <xref ref-type="table" rid="table-2">Table 2</xref>, we incrementally learn the following seven classes. The results of the ablation experiments in <xref ref-type="table" rid="table-4">Table 4</xref> show the effectiveness of the proposed CSD and GSD. In <xref ref-type="table" rid="table-4">Table 4</xref>, the second column is the result obtained without the distillation algorithm, the second and third columns are the AA obtained by using one distillation loss, respectively, and the last column is the result of using two distillation losses at the same time. Each distillation loss we proposed can boost AA, and the best results can be obtained when used together.</p>
            <table-wrap id="table-4">
               <object-id pub-id-type="doi">10.7717/peerjcs.1583/table-4</object-id><label>Table 4</label><caption>
                  <title>Ablation study is performed to validate the contribution of distillation loss in the DOTA dataset.</title>
               </caption>
               <alternatives>
                  <graphic mimetype="image" mime-subtype="png" xlink:href="https://peerj.com/articles/cs-1583/table-4.png"/>
                  <table>
                     <colgroup>
                        <col/>
                        <col/>
                        <col/>
                        <col/>
                        <col/>
                     </colgroup>
                     <thead>
                        <tr>
                           <th>Module</th>
                           <th>1</th>
                           <th>2</th>
                           <th>3</th>
                           <th>4</th>
                        </tr>
                     </thead>
                     <tbody>
                        <tr>
                           <td>CSD</td>
                           <td>–</td>
                           <td>✓</td>
                           <td>–</td>
                           <td>✓</td>
                        </tr>
                        <tr>
                           <td>GSD</td>
                           <td>–</td>
                           <td>–</td>
                           <td>✓</td>
                           <td>✓</td>
                        </tr>
                        <tr>
                           <td>AA</td>
                           <td>14.5</td>
                           <td>65.7</td>
                           <td>64.3</td>
                           <td>66.6</td>
                        </tr>
                     </tbody>
                  </table>
               </alternatives>
            </table-wrap>
         </sec>
      </sec>
      <sec sec-type="discussion">
         <title>Discussion</title>
         <p>Despite the promising gains that can be achieved with our proposed class similarity distillation (CSD) and global similarity distillation (GSD) for class incremental object detection in remote sensing, there are still several concerns that need to be further researched in the future. First, there is a significant discrepancy between the outcomes of sequential addition training and the outcomes of joint training in all classes, which may be caused by the gradual accumulation of mistakes during the incremental learning process. Additionally, the chosen features for correlation distillation need to be more accurate after numerous learning stages. Due to the lack of data and the trade-off between stability and plasticity, the performance of both old and new classes cannot be improved simultaneously.</p>
      </sec>
      <sec>
         <title>Conclusion</title>
         <p>In this article, we propose a novel class similarity distillation-based class incremental object detection method in remote sensing images that considers the similarity of new and old classes. First, class similarity distillation (CSD) was proposed to determine the plasticity and stability of the model during local distillation in the backbone of the object detector. To further mitigate catastrophic forgetting of the incremental model, we also introduced a global similarity distillation (GSD) loss to maximize the mutual information between old and new classes. Results on DOTA, DIOR, and VOC datasets demonstrate that the proposed method is effective in incremental class learning to detect objects in remote sensing images without forgetting what has previously been learned.</p>
         <p>In the future, it will be possible to combine incremental object detection with other techniques, such as those found in <xref ref-type="bibr" rid="ref-29">Morioka &amp; Hyvarinen (2023)</xref>, to maintain better feature discrimination within the incremental class procedure. We will also consider designing novel methods for classifiers and regressors to further boost class incremental object detection performance.</p>
      </sec>
      <sec sec-type="supplementary-material" id="supplemental-information">
         <title> Supplemental Information</title>
         <supplementary-material id="supp-1" mimetype="application" mime-subtype="zip" xlink:href="https://peerj.com/articles/cs-1583/code.zip">
            <object-id pub-id-type="doi">10.7717/peerj-cs.1583/supp-1</object-id><label>Supplemental Information 1</label><caption>
               <title>Code</title>
            </caption>
         </supplementary-material>
      </sec>
   </body>
   <back>
      <sec sec-type="additional-information">
         <title>Additional Information and Declarations</title>
         <fn-group content-type="competing-interests">
            <title>Competing Interests</title><fn id="conflict-1" fn-type="conflict"><p>The authors declare there are no competing interests.</p></fn></fn-group>
         <fn-group content-type="author-contributions">
            <title>Author Contributions</title><fn id="contribution-1" fn-type="con"><p><xref ref-type="contrib" rid="author-1">Mingge Shen</xref> conceived and designed the experiments, analyzed the data, authored or reviewed drafts of the article, and approved the final draft.</p></fn><fn id="contribution-2" fn-type="con"><p><xref ref-type="contrib" rid="author-2">Dehu Chen</xref> conceived and designed the experiments, performed the experiments, analyzed the data, performed the computation work, prepared figures and/or tables, authored or reviewed drafts of the article, and approved the final draft.</p></fn><fn id="contribution-3" fn-type="con"><p><xref ref-type="contrib" rid="author-3">Silan Hu</xref> analyzed the data, prepared figures and/or tables, authored or reviewed drafts of the article, and approved the final draft.</p></fn><fn id="contribution-4" fn-type="con"><p><xref ref-type="contrib" rid="author-4">Gang Xu</xref> performed the experiments, performed the computation work, prepared figures and/or tables, and approved the final draft.</p></fn></fn-group>
         <fn-group content-type="other">
            <title>Data Availability</title><fn id="addinfo-1"><p>The following information was supplied regarding data availability:</p>
            <p>The DOTA dataset is available at <ext-link ext-link-type="uri" xlink:href="https://captain-whu.github.io/DOTA/dataset.html">https://captain-whu.github.io/DOTA/dataset.html</ext-link>.</p>
            <p>Ding J, Xue N, Xia G-S, Bai, X, Yang, W, Yang, M, Belongie S, Luo J, Datcu M, Pelillo M, Zhang L. 2021. Object Detection in Aerial Images: A Large-Scale Benchmark and Challenges. IEEE Transactions on Pattern Analysis and Machine Intelligence. DOI 10.1109/TPAMI.2021.3117983</p>
            <p>The DIOR dataset is available at: Li K, Wan G, Cheng G, Meng L, Han J. 2020. Object detection in optical remote sensing images: A survey and a new benchmark. ISPRS journal of photogrammetry and remote sensing. 159:296-307.</p></fn></fn-group>
      </sec>
      <ref-list content-type="authoryear">
         <title>References</title>
         <ref id="ref-1"><label>Ahn et al. (2019)</label><element-citation publication-type="working-paper">
               <person-group person-group-type="author">
                  <name>
                     <surname>Ahn</surname>
                     <given-names>H</given-names>
                  </name>
                  <name>
                     <surname>Cha</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Lee</surname>
                     <given-names>D</given-names>
                  </name>
                  <name>
                     <surname>Moon</surname>
                     <given-names>T</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2019">2019</year>
               <article-title>Uncertainty-based continual learning with adaptive regularization</article-title>
               <pub-id pub-id-type="arxiv">1905.11614</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-2"><label>Aljundi et al. (2018)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Aljundi</surname>
                     <given-names>R</given-names>
                  </name>
                  <name>
                     <surname>Babiloni</surname>
                     <given-names>F</given-names>
                  </name>
                  <name>
                     <surname>Elhoseiny</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Rohrbach</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Tuytelaars</surname>
                     <given-names>T</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2018">2018</year>
               <article-title>Memory aware synapses: learning what (not) to forget</article-title>
               <fpage>139</fpage>
               <lpage>154</lpage>
               <conf-name>Proceedings of the European Conference on Computer Vision (ECCV)</conf-name>
            </element-citation>
         </ref>
         <ref id="ref-3"><label>Aljundi, Chakravarty &amp; Tuytelaars (2017)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Aljundi</surname>
                     <given-names>R</given-names>
                  </name>
                  <name>
                     <surname>Chakravarty</surname>
                     <given-names>P</given-names>
                  </name>
                  <name>
                     <surname>Tuytelaars</surname>
                     <given-names>T</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Expert gate: lifelong learning with a network of experts</article-title>
               <fpage>3366</fpage>
               <lpage>3375</lpage>
               <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>
               <conf-sponsor>IEEE</conf-sponsor>
               <conf-loc>Piscataway</conf-loc>
            </element-citation>
         </ref>
         <ref id="ref-4"><label>Aljundi, Kelchtermans &amp; Tuytelaars (2019)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Aljundi</surname>
                     <given-names>R</given-names>
                  </name>
                  <name>
                     <surname>Kelchtermans</surname>
                     <given-names>K</given-names>
                  </name>
                  <name>
                     <surname>Tuytelaars</surname>
                     <given-names>T</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2019">2019</year>
               <article-title>Task-free continual learning</article-title>
               <fpage>11254</fpage>
               <lpage>11263</lpage>
               <conf-name>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</conf-name>
               <conf-sponsor>IEEE</conf-sponsor>
               <conf-loc>Piscataway</conf-loc>
            </element-citation>
         </ref>
         <ref id="ref-5"><label>Chen et al. (2020)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Chen</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Wang</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Chen</surname>
                     <given-names>L</given-names>
                  </name>
                  <name>
                     <surname>Cai</surname>
                     <given-names>H</given-names>
                  </name>
                  <name>
                     <surname>Qian</surname>
                     <given-names>Y</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2020">2020</year>
               <article-title>Incremental detection of remote sensing objects with feature pyramid and knowledge distillation</article-title>
               <source>IEEE Transactions on Geoscience and Remote Sensing</source>
               <volume>60</volume>
               <fpage>1</fpage>
               <lpage>13</lpage>
            </element-citation>
         </ref>
         <ref id="ref-6"><label>De Lange &amp; Tuytelaars (2021)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>De Lange</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Tuytelaars</surname>
                     <given-names>T</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2021">2021</year>
               <article-title>Continual prototype evolution: learning online from non-stationary data streams</article-title>
               <fpage>8250</fpage>
               <lpage>8259</lpage>
               <conf-name>Proceedings of the IEEE/CVF international conference on computer vision</conf-name>
               <conf-sponsor>IEEE</conf-sponsor>
               <conf-loc>Piscataway</conf-loc>
            </element-citation>
         </ref>
         <ref id="ref-7"><label>Dong et al. (2021)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Dong</surname>
                     <given-names>N</given-names>
                  </name>
                  <name>
                     <surname>Zhang</surname>
                     <given-names>Y</given-names>
                  </name>
                  <name>
                     <surname>Ding</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Lee</surname>
                     <given-names>GH</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2021">2021</year>
               <article-title>Bridging non co-occurrence with unlabeled in-the-wild data for incremental object detection</article-title>
               <source>Advances in Neural Information Processing Systems</source>
               <volume>34</volume>
               <fpage>30492</fpage>
               <lpage>30503</lpage>
            </element-citation>
         </ref>
         <ref id="ref-8"><label>Feng et al. (2021)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Feng</surname>
                     <given-names>Y</given-names>
                  </name>
                  <name>
                     <surname>Sun</surname>
                     <given-names>X</given-names>
                  </name>
                  <name>
                     <surname>Diao</surname>
                     <given-names>W</given-names>
                  </name>
                  <name>
                     <surname>Li</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Gao</surname>
                     <given-names>X</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2021">2021</year>
               <article-title>Double similarity distillation for semantic image segmentation</article-title>
               <source>IEEE Transactions on Image Processing</source>
               <volume>30</volume>
               <fpage>5363</fpage>
               <lpage>5376</lpage>
               <pub-id pub-id-type="doi">10.1109/TIP.2021.3083113</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-9"><label>Fernando et al. (2017)</label><element-citation publication-type="working-paper">
               <person-group person-group-type="author">
                  <name>
                     <surname>Fernando</surname>
                     <given-names>C</given-names>
                  </name>
                  <name>
                     <surname>Banarse</surname>
                     <given-names>D</given-names>
                  </name>
                  <name>
                     <surname>Blundell</surname>
                     <given-names>C</given-names>
                  </name>
                  <name>
                     <surname>Zwols</surname>
                     <given-names>Y</given-names>
                  </name>
                  <name>
                     <surname>Ha</surname>
                     <given-names>D</given-names>
                  </name>
                  <name>
                     <surname>Rusu</surname>
                     <given-names>AA</given-names>
                  </name>
                  <name>
                     <surname>Pritzel</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Wierstra</surname>
                     <given-names>D</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Pathnet: evolution channels gradient descent in super neural networks</article-title>
               <pub-id pub-id-type="arxiv">1701.08734</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-10"><label>Girshick (2015)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Girshick</surname>
                     <given-names>R</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2015">2015</year>
               <article-title>Fast r-cnn</article-title>
               <fpage>1440</fpage>
               <lpage>1448</lpage>
               <conf-name>Proceedings of the IEEE international conference on computer vision</conf-name>
               <conf-sponsor>IEEE</conf-sponsor>
               <conf-loc>Piscataway</conf-loc>
            </element-citation>
         </ref>
         <ref id="ref-11"><label>Goodfellow et al. (2014)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Goodfellow</surname>
                     <given-names>I</given-names>
                  </name>
                  <name>
                     <surname>Pouget-Abadie</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Mirza</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Xu</surname>
                     <given-names>B</given-names>
                  </name>
                  <name>
                     <surname>Warde-Farley</surname>
                     <given-names>D</given-names>
                  </name>
                  <name>
                     <surname>Ozair</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Courville</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Bengio</surname>
                     <given-names>Y</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2014">2014</year>
               <article-title>Generative adversarial nets</article-title>
               <conf-name>Proceedings of international conference neural information processing systems</conf-name>
               <fpage>2672</fpage>
               <lpage>2680</lpage>
            </element-citation>
         </ref>
         <ref id="ref-12"><label>Hao, Fu &amp; Jiang (2019a)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Hao</surname>
                     <given-names>Y</given-names>
                  </name>
                  <name>
                     <surname>Fu</surname>
                     <given-names>Y</given-names>
                  </name>
                  <name>
                     <surname>Jiang</surname>
                     <given-names>Y-G</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2019">2019a</year>
               <article-title>Take goods from shelves: a dataset for class-incremental object detection</article-title>
               <fpage>271</fpage>
               <lpage>278</lpage>
               <conf-name>Proceedings of the 2019 on international conference on multimedia retrieval</conf-name>
            </element-citation>
         </ref>
         <ref id="ref-13"><label>Hao et al. (2019b)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Hao</surname>
                     <given-names>Y</given-names>
                  </name>
                  <name>
                     <surname>Fu</surname>
                     <given-names>Y</given-names>
                  </name>
                  <name>
                     <surname>Jiang</surname>
                     <given-names>Y-G</given-names>
                  </name>
                  <name>
                     <surname>Tian</surname>
                     <given-names>Q</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2019">2019b</year>
               <article-title>An end-to-end architecture for class-incremental object detection with knowledge distillation</article-title>
               <conf-sponsor>IEEE</conf-sponsor>
               <conf-loc>Piscataway</conf-loc>
               <fpage>1</fpage>
               <lpage>6</lpage>
               <conf-name>2019 IEEE International Conference on Multimedia and Expo (ICME)</conf-name>
            </element-citation>
         </ref>
         <ref id="ref-14"><label>Joseph et al. (2021a)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Joseph</surname>
                     <given-names>K</given-names>
                  </name>
                  <name>
                     <surname>Khan</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Khan</surname>
                     <given-names>FS</given-names>
                  </name>
                  <name>
                     <surname>Balasubramanian</surname>
                     <given-names>VN</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2021">2021a</year>
               <article-title>Towards open world object detection</article-title>
               <fpage>5830</fpage>
               <lpage>5840</lpage>
               <conf-name>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</conf-name>
               <conf-sponsor>IEEE</conf-sponsor>
               <conf-loc>Piscataway</conf-loc>
            </element-citation>
         </ref>
         <ref id="ref-15"><label>Joseph et al. (2021b)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Joseph</surname>
                     <given-names>K</given-names>
                  </name>
                  <name>
                     <surname>Rajasegaran</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Khan</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Khan</surname>
                     <given-names>FS</given-names>
                  </name>
                  <name>
                     <surname>Balasubramanian</surname>
                     <given-names>VN</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2021">2021b</year>
               <article-title>Incremental object detection via meta-learning</article-title>
               <source>IEEE Transactions on Pattern Analysis and Machine Intelligence</source>
               <volume>44</volume>
               <issue>12</issue>
               <fpage>9209</fpage>
               <lpage>9216</lpage>
            </element-citation>
         </ref>
         <ref id="ref-16"><label>Kirkpatrick et al. (2017)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Kirkpatrick</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Pascanu</surname>
                     <given-names>R</given-names>
                  </name>
                  <name>
                     <surname>Rabinowitz</surname>
                     <given-names>N</given-names>
                  </name>
                  <name>
                     <surname>Veness</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Desjardins</surname>
                     <given-names>G</given-names>
                  </name>
                  <name>
                     <surname>Rusu</surname>
                     <given-names>AA</given-names>
                  </name>
                  <name>
                     <surname>Milan</surname>
                     <given-names>K</given-names>
                  </name>
                  <name>
                     <surname>Quan</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Ramalho</surname>
                     <given-names>T</given-names>
                  </name>
                  <name>
                     <surname>Grabska-Barwinska</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Hassabis</surname>
                     <given-names>D</given-names>
                  </name>
                  <name>
                     <surname>Clopath</surname>
                     <given-names>C</given-names>
                  </name>
                  <name>
                     <surname>Kumaran</surname>
                     <given-names>D</given-names>
                  </name>
                  <name>
                     <surname>Hadsell</surname>
                     <given-names>R</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Overcoming catastrophic forgetting in neural networks</article-title>
               <source>Proceedings of the National Academy of Sciences of the United States of America</source>
               <volume>114</volume>
               <issue>13</issue>
               <fpage>3521</fpage>
               <lpage>3526</lpage>
            </element-citation>
         </ref>
         <ref id="ref-17"><label>Lange et al. (2020)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Lange</surname>
                     <given-names>MD</given-names>
                  </name>
                  <name>
                     <surname>Jia</surname>
                     <given-names>X</given-names>
                  </name>
                  <name>
                     <surname>Parisot</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Leonardis</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Slabaugh</surname>
                     <given-names>G</given-names>
                  </name>
                  <name>
                     <surname>Tuytelaars</surname>
                     <given-names>T</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2020">2020</year>
               <article-title>Unsupervised model personalization while preserving privacy and scalability: an open problem</article-title>
               <fpage>14463</fpage>
               <lpage>14472</lpage>
               <conf-name>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</conf-name>
               <conf-sponsor>IEEE</conf-sponsor>
               <conf-loc>Piscataway</conf-loc>
            </element-citation>
         </ref>
         <ref id="ref-18"><label>Lee et al. (2019)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Lee</surname>
                     <given-names>K</given-names>
                  </name>
                  <name>
                     <surname>Lee</surname>
                     <given-names>K</given-names>
                  </name>
                  <name>
                     <surname>Shin</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Lee</surname>
                     <given-names>H</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2019">2019</year>
               <article-title>Overcoming catastrophic forgetting with unlabeled data in the wild</article-title>
               <fpage>312</fpage>
               <lpage>321</lpage>
               <conf-name>Proceedings of the IEEE/CVF international conference on computer vision</conf-name>
               <conf-sponsor>IEEE</conf-sponsor>
               <conf-loc>Piscataway</conf-loc>
            </element-citation>
         </ref>
         <ref id="ref-19"><label>Li et al. (2022)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Li</surname>
                     <given-names>H</given-names>
                  </name>
                  <name>
                     <surname>Chen</surname>
                     <given-names>Y</given-names>
                  </name>
                  <name>
                     <surname>Zhang</surname>
                     <given-names>Z</given-names>
                  </name>
                  <name>
                     <surname>Peng</surname>
                     <given-names>J</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2022">2022</year>
               <article-title>Raise: rank-aware incremental learning for remote sensing object detection</article-title>
               <source>Symmetry</source>
               <volume>14</volume>
               <issue>5</issue>
               <fpage>1020</fpage>
               <pub-id pub-id-type="doi">10.3390/sym14051020</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-20"><label>Li et al. (2020)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Li</surname>
                     <given-names>K</given-names>
                  </name>
                  <name>
                     <surname>Wan</surname>
                     <given-names>G</given-names>
                  </name>
                  <name>
                     <surname>Cheng</surname>
                     <given-names>G</given-names>
                  </name>
                  <name>
                     <surname>Meng</surname>
                     <given-names>L</given-names>
                  </name>
                  <name>
                     <surname>Han</surname>
                     <given-names>J</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2020">2020</year>
               <article-title>Object detection in optical remote sensing images: a survey and a new benchmark</article-title>
               <source>ISPRS Journal of Photogrammetry and Remote Sensing</source>
               <volume>159</volume>
               <fpage>296</fpage>
               <lpage>307</lpage>
               <pub-id pub-id-type="doi">10.1016/j.isprsjprs.2019.11.023</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-21"><label>Li &amp; Hoiem (2017)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Li</surname>
                     <given-names>Z</given-names>
                  </name>
                  <name>
                     <surname>Hoiem</surname>
                     <given-names>D</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Learning without forgetting</article-title>
               <source>IEEE Transactions on Pattern Analysis and Machine Intelligence</source>
               <volume>40</volume>
               <issue>12</issue>
               <fpage>2935</fpage>
               <lpage>2947</lpage>
            </element-citation>
         </ref>
         <ref id="ref-22"><label>Lin et al. (2017)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Lin</surname>
                     <given-names>T-Y</given-names>
                  </name>
                  <name>
                     <surname>Dollár</surname>
                     <given-names>P</given-names>
                  </name>
                  <name>
                     <surname>Girshick</surname>
                     <given-names>R</given-names>
                  </name>
                  <name>
                     <surname>He</surname>
                     <given-names>K</given-names>
                  </name>
                  <name>
                     <surname>Hariharan</surname>
                     <given-names>B</given-names>
                  </name>
                  <name>
                     <surname>Belongie</surname>
                     <given-names>S</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Feature pyramid networks for object detection</article-title>
               <fpage>2117</fpage>
               <lpage>2125</lpage>
               <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>
               <conf-sponsor>IEEE</conf-sponsor>
               <conf-loc>Piscataway</conf-loc>
            </element-citation>
         </ref>
         <ref id="ref-23"><label>Liu et al. (2020a)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Liu</surname>
                     <given-names>L</given-names>
                  </name>
                  <name>
                     <surname>Kuang</surname>
                     <given-names>Z</given-names>
                  </name>
                  <name>
                     <surname>Chen</surname>
                     <given-names>Y</given-names>
                  </name>
                  <name>
                     <surname>Xue</surname>
                     <given-names>J-H</given-names>
                  </name>
                  <name>
                     <surname>Yang</surname>
                     <given-names>W</given-names>
                  </name>
                  <name>
                     <surname>Zhang</surname>
                     <given-names>W</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2020">2020a</year>
               <article-title>Incdet: in defense of elastic weight consolidation for incremental object detection</article-title>
               <source>IEEE Transactions on Neural Networks and Learning Systems</source>
               <volume>32</volume>
               <issue>6</issue>
               <fpage>2306</fpage>
               <lpage>2319</lpage>
            </element-citation>
         </ref>
         <ref id="ref-24"><label>Liu et al. (2020b)</label><element-citation publication-type="working-paper">
               <person-group person-group-type="author">
                  <name>
                     <surname>Liu</surname>
                     <given-names>X</given-names>
                  </name>
                  <name>
                     <surname>Yang</surname>
                     <given-names>H</given-names>
                  </name>
                  <name>
                     <surname>Ravichandran</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Bhotika</surname>
                     <given-names>R</given-names>
                  </name>
                  <name>
                     <surname>Soatto</surname>
                     <given-names>S</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2020">2020b</year>
               <article-title>Multi-task incremental learning for object detection</article-title>
               <pub-id pub-id-type="arxiv">2002.05347</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-25"><label>Lopez-Paz &amp; Ranzato (2017)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Lopez-Paz</surname>
                     <given-names>D</given-names>
                  </name>
                  <name>
                     <surname>Ranzato</surname>
                     <given-names>M</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Gradient episodic memory for continual learning</article-title>
               <conf-name>Proceedings of international conference neural information processing systems</conf-name>
               <fpage>6467</fpage>
               <lpage>6476</lpage>
            </element-citation>
         </ref>
         <ref id="ref-26"><label>Mallya &amp; Lazebnik (2018)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Mallya</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Lazebnik</surname>
                     <given-names>S</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2018">2018</year>
               <article-title>Packnet: adding multiple tasks to a single network by iterative pruning</article-title>
               <fpage>7765</fpage>
               <lpage>7773</lpage>
               <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>
            </element-citation>
         </ref>
         <ref id="ref-27"><label>Masana et al. (2020)</label><element-citation publication-type="working-paper">
               <person-group person-group-type="author">
                  <name>
                     <surname>Masana</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Liu</surname>
                     <given-names>X</given-names>
                  </name>
                  <name>
                     <surname>Twardowski</surname>
                     <given-names>B</given-names>
                  </name>
                  <name>
                     <surname>Menta</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Bagdanov</surname>
                     <given-names>AD</given-names>
                  </name>
                  <name>
                     <surname>Van de Weijer</surname>
                     <given-names>J</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2020">2020</year>
               <article-title>Class-incremental learning: survey and performance evaluation on image classification</article-title>
               <pub-id pub-id-type="arxiv">2010.15277</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-28"><label>Ming et al. (2021)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Ming</surname>
                     <given-names>Q</given-names>
                  </name>
                  <name>
                     <surname>Miao</surname>
                     <given-names>L</given-names>
                  </name>
                  <name>
                     <surname>Zhou</surname>
                     <given-names>Z</given-names>
                  </name>
                  <name>
                     <surname>Dong</surname>
                     <given-names>Y</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2021">2021</year>
               <article-title>Cfc-net: a critical feature capturing network for arbitrary-oriented object detection in remote-sensing images</article-title>
               <source>IEEE Transactions on Geoscience and Remote Sensing</source>
               <volume>60</volume>
               <fpage>1</fpage>
               <lpage>14</lpage>
            </element-citation>
         </ref>
         <ref id="ref-29"><label>Morioka &amp; Hyvarinen (2023)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Morioka</surname>
                     <given-names>H</given-names>
                  </name>
                  <name>
                     <surname>Hyvarinen</surname>
                     <given-names>A</given-names>
                  </name>
               </person-group>
               <article-title>Connectivity-contrastive learning: combining causal discovery and representation learning for multimodal data</article-title>
               <conf-name>International conference on artificial intelligence and statistics</conf-name>
               <year iso-8601-date="2023">2023</year>
               <conf-sponsor>PMLR</conf-sponsor>
               <fpage>3399</fpage>
               <lpage>3426</lpage>
            </element-citation>
         </ref>
         <ref id="ref-30"><label>Nguyen, Ngo &amp; Nguyen-Xuan (2017)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Nguyen</surname>
                     <given-names>TN</given-names>
                  </name>
                  <name>
                     <surname>Ngo</surname>
                     <given-names>TD</given-names>
                  </name>
                  <name>
                     <surname>Nguyen-Xuan</surname>
                     <given-names>H</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>A novel three-variable shear deformation plate formulation: theory and isogeometric implementation</article-title>
               <source>Computer Methods in Applied Mechanics and Engineering</source>
               <volume>326</volume>
               <fpage>376</fpage>
               <lpage>401</lpage>
               <pub-id pub-id-type="doi">10.1016/j.cma.2017.07.024</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-31"><label>Peng et al. (2021)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Peng</surname>
                     <given-names>C</given-names>
                  </name>
                  <name>
                     <surname>Zhao</surname>
                     <given-names>K</given-names>
                  </name>
                  <name>
                     <surname>Maksoud</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Li</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Lovell</surname>
                     <given-names>BC</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2021">2021</year>
               <article-title>Sid: incremental learning for anchor-free object detection via selective and inter-related distillation</article-title>
               <source>Computer Vision and Image Understanding</source>
               <volume>210</volume>
               <fpage>103229</fpage>
               <pub-id pub-id-type="doi">10.1016/j.cviu.2021.103229</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-32"><label>Perkonigg et al. (2021)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Perkonigg</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Hofmanninger</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Herold</surname>
                     <given-names>CJ</given-names>
                  </name>
                  <name>
                     <surname>Brink</surname>
                     <given-names>JA</given-names>
                  </name>
                  <name>
                     <surname>Pianykh</surname>
                     <given-names>O</given-names>
                  </name>
                  <name>
                     <surname>Prosch</surname>
                     <given-names>H</given-names>
                  </name>
                  <name>
                     <surname>Langs</surname>
                     <given-names>G</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2021">2021</year>
               <article-title>Dynamic memory to alleviate catastrophic forgetting in continual learning with medical imaging</article-title>
               <source>Nature Communications</source>
               <volume>12</volume>
               <issue>1</issue>
               <fpage>1</fpage>
               <lpage>12</lpage>
               <pub-id pub-id-type="doi">10.1038/s41467-021-25858-z</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-33"><label>Rebuffi et al. (2017)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Rebuffi</surname>
                     <given-names>S-A</given-names>
                  </name>
                  <name>
                     <surname>Kolesnikov</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Sperl</surname>
                     <given-names>G</given-names>
                  </name>
                  <name>
                     <surname>Lampert</surname>
                     <given-names>CH</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>icarl: incremental classifier and representation learning</article-title>
               <fpage>2001</fpage>
               <lpage>2010</lpage>
               <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>
               <conf-sponsor>IEEE</conf-sponsor>
               <conf-loc>Piscataway</conf-loc>
            </element-citation>
         </ref>
         <ref id="ref-34"><label>Redmon et al. (2016)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Redmon</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Divvala</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Girshick</surname>
                     <given-names>R</given-names>
                  </name>
                  <name>
                     <surname>Farhadi</surname>
                     <given-names>A</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2016">2016</year>
               <article-title>You only look once: unified, real-time object detection</article-title>
               <fpage>779</fpage>
               <lpage>788</lpage>
               <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>
               <conf-sponsor>IEEE</conf-sponsor>
               <conf-loc>Piscataway</conf-loc>
            </element-citation>
         </ref>
         <ref id="ref-35"><label>Redmon &amp; Farhadi (2017)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Redmon</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Farhadi</surname>
                     <given-names>A</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Yolo9000: better, faster, stronger</article-title>
               <fpage>7263</fpage>
               <lpage>7271</lpage>
               <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>
               <conf-sponsor>IEEE</conf-sponsor>
               <conf-loc>Piscataway</conf-loc>
            </element-citation>
         </ref>
         <ref id="ref-36"><label>Redmon &amp; Farhadi (2018)</label><element-citation publication-type="working-paper">
               <person-group person-group-type="author">
                  <name>
                     <surname>Redmon</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Farhadi</surname>
                     <given-names>A</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2018">2018</year>
               <article-title>Yolov3: an incremental improvement</article-title>
               <pub-id pub-id-type="arxiv">1804.02767</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-37"><label>Ren et al. (2015)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Ren</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>He</surname>
                     <given-names>K</given-names>
                  </name>
                  <name>
                     <surname>Girshick</surname>
                     <given-names>R</given-names>
                  </name>
                  <name>
                     <surname>Sun</surname>
                     <given-names>J</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2015">2015</year>
               <article-title>Faster r-cnn: towards real-time object detection with region proposal networks</article-title>
               <conf-name>Proceedings of international conference neural information processing systems</conf-name>
               <fpage>91</fpage>
               <lpage>99</lpage>
            </element-citation>
         </ref>
         <ref id="ref-38"><label>Robins (1995)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Robins</surname>
                     <given-names>A</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="1995">1995</year>
               <article-title>Catastrophic forgetting, rehearsal and pseudorehearsal</article-title>
               <source>Connection Science</source>
               <volume>7</volume>
               <issue>2</issue>
               <fpage>123</fpage>
               <lpage>146</lpage>
               <pub-id pub-id-type="doi">10.1080/09540099550039318</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-39"><label>Rolnick et al. (2019)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Rolnick</surname>
                     <given-names>D</given-names>
                  </name>
                  <name>
                     <surname>Ahuja</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Schwarz</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Lillicrap</surname>
                     <given-names>T</given-names>
                  </name>
                  <name>
                     <surname>Wayne</surname>
                     <given-names>G</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2019">2019</year>
               <article-title>Experience replay for continual learning</article-title>
               <source>Advances in Neural Information Processing Systems</source>
               <volume>32</volume>
               <fpage>350</fpage>
               <lpage>360</lpage>
            </element-citation>
         </ref>
         <ref id="ref-40"><label>Rusu et al. (2016)</label><element-citation publication-type="working-paper">
               <person-group person-group-type="author">
                  <name>
                     <surname>Rusu</surname>
                     <given-names>AA</given-names>
                  </name>
                  <name>
                     <surname>Rabinowitz</surname>
                     <given-names>NC</given-names>
                  </name>
                  <name>
                     <surname>Desjardins</surname>
                     <given-names>G</given-names>
                  </name>
                  <name>
                     <surname>Soyer</surname>
                     <given-names>H</given-names>
                  </name>
                  <name>
                     <surname>Kirkpatrick</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Kavukcuoglu</surname>
                     <given-names>K</given-names>
                  </name>
                  <name>
                     <surname>Pascanu</surname>
                     <given-names>R</given-names>
                  </name>
                  <name>
                     <surname>Hadsell</surname>
                     <given-names>R</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2016">2016</year>
               <article-title>Progressive neural networks</article-title>
               <pub-id pub-id-type="arxiv">1606.04671</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-41"><label>Serra et al. (2018)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Serra</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Suris</surname>
                     <given-names>D</given-names>
                  </name>
                  <name>
                     <surname>Miron</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Karatzoglou</surname>
                     <given-names>A</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2018">2018</year>
               <article-title>Overcoming catastrophic forgetting with hard attention to the task</article-title>
               <conf-sponsor>PMLR</conf-sponsor>
               <fpage>4548</fpage>
               <lpage>4557</lpage>
               <conf-name>International Conference on Machine Learning</conf-name>
            </element-citation>
         </ref>
         <ref id="ref-42"><label>Shieh et al. (2020)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Shieh</surname>
                     <given-names>J-L</given-names>
                  </name>
                  <name>
                     <surname>Haq</surname>
                     <given-names>QMU</given-names>
                  </name>
                  <name>
                     <surname>Haq</surname>
                     <given-names>MA</given-names>
                  </name>
                  <name>
                     <surname>Karam</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Chondro</surname>
                     <given-names>P</given-names>
                  </name>
                  <name>
                     <surname>Gao</surname>
                     <given-names>D-Q</given-names>
                  </name>
                  <name>
                     <surname>Ruan</surname>
                     <given-names>S-J</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2020">2020</year>
               <article-title>Continual learning strategy in one-stage object detection framework based on experience replay for autonomous driving vehicle</article-title>
               <source>Sensors</source>
               <volume>20</volume>
               <issue>23</issue>
               <fpage>6777</fpage>
               <pub-id pub-id-type="doi">10.3390/s20236777</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-43"><label>Shin et al. (2017)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Shin</surname>
                     <given-names>H</given-names>
                  </name>
                  <name>
                     <surname>Lee</surname>
                     <given-names>JK</given-names>
                  </name>
                  <name>
                     <surname>Kim</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Kim</surname>
                     <given-names>J</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Continual learning with deep generative replay</article-title>
               <conf-name>Proceedings of international conference neural information processing systems</conf-name>
               <fpage>2994</fpage>
               <lpage>3003</lpage>
            </element-citation>
         </ref>
         <ref id="ref-44"><label>Shmelkov, Schmid &amp; Alahari (2017)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Shmelkov</surname>
                     <given-names>K</given-names>
                  </name>
                  <name>
                     <surname>Schmid</surname>
                     <given-names>C</given-names>
                  </name>
                  <name>
                     <surname>Alahari</surname>
                     <given-names>K</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Incremental learning of object detectors without catastrophic forgetting</article-title>
               <fpage>3400</fpage>
               <lpage>3409</lpage>
               <conf-name>Proceedings of the IEEE international conference on computer vision</conf-name>
               <conf-sponsor>IEEE</conf-sponsor>
               <conf-loc>Piscataway</conf-loc>
            </element-citation>
         </ref>
         <ref id="ref-45"><label>Simon et al. (2022)</label><element-citation publication-type="working-paper">
               <person-group person-group-type="author">
                  <name>
                     <surname>Simon</surname>
                     <given-names>C</given-names>
                  </name>
                  <name>
                     <surname>Faraki</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Tsai</surname>
                     <given-names>Y-H</given-names>
                  </name>
                  <name>
                     <surname>Yu</surname>
                     <given-names>X</given-names>
                  </name>
                  <name>
                     <surname>Schulter</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Suh</surname>
                     <given-names>Y</given-names>
                  </name>
                  <name>
                     <surname>Harandi</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Chandraker</surname>
                     <given-names>M</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2022">2022</year>
               <article-title>On generalizing beyond domains in cross-domain continual learning</article-title>
               <pub-id pub-id-type="arxiv">2203.03970</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-46"><label>Tian et al. (2019)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Tian</surname>
                     <given-names>Z</given-names>
                  </name>
                  <name>
                     <surname>Shen</surname>
                     <given-names>C</given-names>
                  </name>
                  <name>
                     <surname>Chen</surname>
                     <given-names>H</given-names>
                  </name>
                  <name>
                     <surname>He</surname>
                     <given-names>T</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2019">2019</year>
               <article-title>Fcos: fully convolutional one-stage object detection</article-title>
               <fpage>9627</fpage>
               <lpage>9636</lpage>
               <conf-name>Proceedings of the IEEE/CVF international conference on computer vision</conf-name>
               <conf-sponsor>IEEE</conf-sponsor>
               <conf-loc>Piscataway</conf-loc>
            </element-citation>
         </ref>
         <ref id="ref-47"><label>Ul Haq et al. (2021)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Ul Haq</surname>
                     <given-names>QM</given-names>
                  </name>
                  <name>
                     <surname>Ruan</surname>
                     <given-names>S-J</given-names>
                  </name>
                  <name>
                     <surname>Haq</surname>
                     <given-names>MA</given-names>
                  </name>
                  <name>
                     <surname>Karam</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Shieh</surname>
                     <given-names>JL</given-names>
                  </name>
                  <name>
                     <surname>Chondro</surname>
                     <given-names>P</given-names>
                  </name>
                  <name>
                     <surname>Gao</surname>
                     <given-names>D-Q</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2021">2021</year>
               <article-title>An incremental learning of yolov3 without catastrophic forgetting for smart city applications</article-title>
               <source>IEEE Consumer Electronics Magazine</source>
               <volume>11</volume>
               <issue>5</issue>
               <fpage>56</fpage>
               <lpage>63</lpage>
            </element-citation>
         </ref>
         <ref id="ref-48"><label>Xia et al. (2018)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Xia</surname>
                     <given-names>G-S</given-names>
                  </name>
                  <name>
                     <surname>Bai</surname>
                     <given-names>X</given-names>
                  </name>
                  <name>
                     <surname>Ding</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Zhu</surname>
                     <given-names>Z</given-names>
                  </name>
                  <name>
                     <surname>Belongie</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Luo</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>Datcu</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Pelillo</surname>
                     <given-names>M</given-names>
                  </name>
                  <name>
                     <surname>Zhang</surname>
                     <given-names>L</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2018">2018</year>
               <article-title>Dota: a large-scale dataset for object detection in aerial images</article-title>
               <fpage>3974</fpage>
               <lpage>3983</lpage>
               <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>
               <conf-sponsor>IEEE</conf-sponsor>
               <conf-loc>Piscataway</conf-loc>
            </element-citation>
         </ref>
         <ref id="ref-49"><label>Xiaolin et al. (2022)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Xiaolin</surname>
                     <given-names>F</given-names>
                  </name>
                  <name>
                     <surname>Fan</surname>
                     <given-names>H</given-names>
                  </name>
                  <name>
                     <surname>Ming</surname>
                     <given-names>Y</given-names>
                  </name>
                  <name>
                     <surname>Tongxin</surname>
                     <given-names>Z</given-names>
                  </name>
                  <name>
                     <surname>Ran</surname>
                     <given-names>B</given-names>
                  </name>
                  <name>
                     <surname>Zenghui</surname>
                     <given-names>Z</given-names>
                  </name>
                  <name>
                     <surname>Zhiyuan</surname>
                     <given-names>G</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2022">2022</year>
               <article-title>Small object detection in remote sensing images based on super-resolution</article-title>
               <source>Pattern Recognition Letters</source>
               <volume>153</volume>
               <fpage>107</fpage>
               <lpage>112</lpage>
               <pub-id pub-id-type="doi">10.1016/j.patrec.2021.11.027</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-50"><label>Yan, Xie &amp; He (2021)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Yan</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Xie</surname>
                     <given-names>J</given-names>
                  </name>
                  <name>
                     <surname>He</surname>
                     <given-names>X</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2021">2021</year>
               <article-title>Der: dynamically expandable representation for class incremental learning</article-title>
               <fpage>3014</fpage>
               <lpage>3023</lpage>
               <conf-name>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</conf-name>
               <conf-sponsor>IEEE</conf-sponsor>
               <conf-loc>Piscataway</conf-loc>
            </element-citation>
         </ref>
         <ref id="ref-51"><label>Yang et al. (2022)</label><element-citation publication-type="journal">
               <person-group person-group-type="author">
                  <name>
                     <surname>Yang</surname>
                     <given-names>D</given-names>
                  </name>
                  <name>
                     <surname>Zhou</surname>
                     <given-names>Y</given-names>
                  </name>
                  <name>
                     <surname>Zhang</surname>
                     <given-names>A</given-names>
                  </name>
                  <name>
                     <surname>Sun</surname>
                     <given-names>X</given-names>
                  </name>
                  <name>
                     <surname>Wu</surname>
                     <given-names>D</given-names>
                  </name>
                  <name>
                     <surname>Wang</surname>
                     <given-names>W</given-names>
                  </name>
                  <name>
                     <surname>Ye</surname>
                     <given-names>Q</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2022">2022</year>
               <article-title>Multi-view correlation distillation for incremental object detection</article-title>
               <source>Pattern Recognition</source>
               <volume>131</volume>
               <fpage>108863</fpage>
               <pub-id pub-id-type="doi">10.1016/j.patcog.2022.108863</pub-id>
            </element-citation>
         </ref>
         <ref id="ref-52"><label>Yang &amp; Cai (2022)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Yang</surname>
                     <given-names>S</given-names>
                  </name>
                  <name>
                     <surname>Cai</surname>
                     <given-names>Z</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2023">2023</year>
               <article-title>Cross domain lifelong learning based on task similarity</article-title>
               <conf-name>IEEE transactions on pattern analysis and machine intelligence</conf-name>
               <conf-sponsor>IEEE</conf-sponsor>
               <conf-loc>Piscataway</conf-loc>
            </element-citation>
         </ref>
         <ref id="ref-53"><label>Ye &amp; Bors (2021)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Ye</surname>
                     <given-names>F</given-names>
                  </name>
                  <name>
                     <surname>Bors</surname>
                     <given-names>AG</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2021">2021</year>
               <article-title>Lifelong infinite mixture model based on knowledge-driven dirichlet process</article-title>
               <fpage>10695</fpage>
               <lpage>10704</lpage>
               <conf-name>Proceedings of the IEEE/CVF international conference on computer vision</conf-name>
               <conf-sponsor>IEEE</conf-sponsor>
               <conf-loc>Piscataway</conf-loc>
            </element-citation>
         </ref>
         <ref id="ref-54"><label>Zenke, Poole &amp; Ganguli (2017)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Zenke</surname>
                     <given-names>F</given-names>
                  </name>
                  <name>
                     <surname>Poole</surname>
                     <given-names>B</given-names>
                  </name>
                  <name>
                     <surname>Ganguli</surname>
                     <given-names>S</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2017">2017</year>
               <article-title>Continual learning through synaptic intelligence</article-title>
               <fpage>3987</fpage>
               <lpage>3995</lpage>
               <conf-name>International Conference on Machine Learning</conf-name>
            </element-citation>
         </ref>
         <ref id="ref-55"><label>Zhang et al. (2021)</label><element-citation publication-type="confproc">
               <person-group person-group-type="author">
                  <name>
                     <surname>Zhang</surname>
                     <given-names>N</given-names>
                  </name>
                  <name>
                     <surname>Sun</surname>
                     <given-names>Z</given-names>
                  </name>
                  <name>
                     <surname>Zhang</surname>
                     <given-names>K</given-names>
                  </name>
                  <name>
                     <surname>Xiao</surname>
                     <given-names>L</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2021">2021</year>
               <article-title>Incremental learning of object detection with output merging of compact expert detectors</article-title>
               <conf-name>2021 4th international conference on intelligent autonomous systems (ICoIAS)</conf-name>
            </element-citation>
         </ref>
         <ref id="ref-56"><label>Zhu et al. (2020)</label><element-citation publication-type="working-paper">
               <person-group person-group-type="author">
                  <name>
                     <surname>Zhu</surname>
                     <given-names>X</given-names>
                  </name>
                  <name>
                     <surname>Su</surname>
                     <given-names>W</given-names>
                  </name>
                  <name>
                     <surname>Lu</surname>
                     <given-names>L</given-names>
                  </name>
                  <name>
                     <surname>Li</surname>
                     <given-names>B</given-names>
                  </name>
                  <name>
                     <surname>Wang</surname>
                     <given-names>X</given-names>
                  </name>
                  <name>
                     <surname>Dai</surname>
                     <given-names>J</given-names>
                  </name>
               </person-group>
               <year iso-8601-date="2020">2020</year>
               <article-title>Deformable detr: deformable transformers for end-to-end object detection</article-title>
               <pub-id pub-id-type="arxiv">2010.04159</pub-id>
            </element-citation>
         </ref>
      </ref-list>
   </back>
</article>
