@article { author = {Moghimi, A. and Sazgarnia, A. and Aghkhani, M. H.}, title = {Spectral Feature Selection from the Hyperspectral Dataset to Identify Pistachio Leaves Infected by Psylla}, journal = {Journal of Agricultural Machinery}, volume = {12}, number = {2}, pages = {159-167}, year = {2022}, publisher = {Ferdowsi University of Mashhad}, issn = {2228-6829}, eissn = {2423-3943}, doi = {10.22067/jam.v12i2.82089}, abstract = {IntroductionPistachio production has been adversely affected by Psylla, which is a devastating insect. The primary goal of this study was to select sensitive spectral bands to distinguish pistachio leaves infected by Psylla from healthy leaves. Diagnosis of psylla disease before the onset of visual cues is crucial for making decisions about topical garden management. Since it is not possible to diagnose psylla disease even after the onset of symptoms with the help of color images by drones, hyperspectral and multispectral sensors are needed. The main purpose of this study was to extract spectral bands suitable for distinguishing healthy leaves from psylla leaves. For this purpose, in this paper, a new method for selecting sensitive spectral properties from hyperspectral data with the high spectral resolution is presented. The intelligent selection of sensitive bands is a convenient way to build multispectral sensors for a specific application (in this article, the diagnosis of psylla leaves). Knowledge of disease-sensitive wavelengths can also help researchers analyze multispectral and hyperspectral aerial images captured by satellites or drones.Materials and MethodsA total number of 160 healthy and diseased leaves were scanned in 64 spectral bands between 400-1100 nm with 10 nm spectral resolution. A random forest algorithm was used to identify the importance of features in classifying the dataset into diseased and healthy leaves. After computing the importance of the features, a clustering algorithm was developed to cluster the most important features into six clusters such that the center of clusters was 50 nm apart. To transfer the hyperspectral dataset into a multispectral dataset, the reflectance was averaged in spectral bands within ±15 nm of each cluster center and achieved six broad multispectral bands. Afterwards a support vector machine algorithm was utilized to classify the diseased and healthy leaves using both hyperspectral and multispectral datasets.Results and DiscussionThe center of clusters were 468 nm, 598 nm, 710 nm, 791 nm, 858 nm, and 1023 nm, which were calculated by taking the average of all the members assigned to the individual clusters. These are the most informative spectral bands to distinguish the pistachio leaves infected by Psylla from the healthy leaves. The F1-score was 90.91 when the hyperspectral dataset (all bands) was used, while the F1-score was 88.69 for the multispectral dataset. The subtle difference between the F1-scores indicates that the proposed pipeline in this study was able to select appropriately the sensitive bands while retaining all relevant information.ConclusionThe importance of spectral bands in the visible and near-infrared region (between 400 and 1100 nm) was obtained to identify pistachio tree leaves infected with psylla disease. Based on the importance of spectral properties and using a clustering algorithm, six wavelengths were obtained as the best wavelengths for classifying healthy and diseased pistachio leaves. Then, by averaging the wavelengths at a distance of 15 nm from these six centers, the hyperspectral data (64 bands) became multispectral (6 bands). Since the correlation between the wavelengths in the near-infrared region was very high (more than 95%), out of the three selected wavelengths in the near-infrared region (710, 791, and 1023), only the 710-nm wavelength, which was closer to the visible region, was selected. The results of classification of infected and diseased leaves using hyperspectral and multispectral data showed that the degree of classification accuracy decreases by about 2% and if only 4 bands are used, the degree of accuracy decreases by about 3%.The results of this study revealed that the proposed framework could be used for selecting the most informative spectral bands and accordingly develop custom-designed multispectral sensors for disease detection in pistachio. In addition, we could reduce the dimensionality of the hyperspectral datasets and avoid the issues related to the curse of dimensionalitylity.}, keywords = {Classification,Feature selection,hyperspectral data,multispectral data,Pistachio,Psylla,random forest,Spectroscopy}, title_fa = {انتخاب طول‌موج‌های‌ طیفی حساس از میان داده‌های فراطیفی جهت تشخیص برگ‌های درخت پسته آلوده به پسیل}, abstract_fa = {در سال‌های اخیر تولید پسته توسط آفتی به نام پسیل تهدید شده است. هدف از انجام این تحقیق انتخاب باندهای طیفی مناسب جهت تشخیص برگ‌های آلوده به آفت پسیل می‌باشد. برای این منظور از 160 برگ سالم و 160 برگ بیمار در 64 باند طیفی در ناحیه بین 400 تا 1100 نانومتر با قدرت تفکیک طیفی حدوداً 10 نانومتر طیف‎نگاری انجام شد. از روش جنگل‌های تصادفی برای مرتب‌سازی طول‌موج‌ها بر اساس قدرت تفکیک بین برگ‌های سالم و بیمار استفاده شد. پس از شناسایی و مرتب‌سازی طول‌موج‌ها بر اساس میزان اهمیت، طول‌موج‌های دارای بیشترین اهمیت توسط یک الگوریتم خوشه‌بندی در شش خوشه تقسیم‌بندی شدند به‌طوری‌که میانگین طول‌موج‌های هر خوشه‌ها حداقل 50 نانومتر از یکدیگر فاصله داشته باشند. میانگین طول‌موج‌های قرار گرفته در شش خوشه به‌ترتیب اهمیت عبارت بود از: 710، 791، 858، 598، 468، و 1023 نانومتر. برای تبدیل داده‌های فراطیفی به چندطیفی، از بازتابش‌های به‌دست آمده در فاصله 15± نانومتری این مراکز میانگین‌گیری انجام گرفت و داده‌های بازتابش به‌دست آمده از دیگر طیف‌ها حذف شد. الگوریتم ماشین بردار پشتیبان برای طبقه‌بندی برگ‌های سالم و بیمار با استفاده از داده‌های فراطیفی و چند طیفی به‌دست آمده در این پژوهش به‌کار گرفته شد. دقت طبقه‌بندی با استفاده از تمام 64 طول‌موج (داده‌های فراطیفی) 90.91 درصد و با استفاده از 6 طول‌موج (داده‌های چندطیفی) 88.69 درصد بود. اختلاف بسیار کم (حدود 2 درصد) در میزان دقت طبقه‌بندی نشان‌دهنده صحت شیوه ارائه شده در این پژوهش برای کاهش ابعاد داده‌های فراطیفی می‌باشد. علاوه بر کاهش ابعاد داده، تعیین باندهای طیفی مناسب از میان داده‌های فراطیفی گامی موثر در طراحی و ساخت حسگری چندطیفی جهت تشخیص بیماری گیاهان می‌باشد.}, keywords_fa = {انتخاب ویژگی,پسته,جنگل‌های تصادفی,طیف‌سنجی,فراطیفی}, url = {https://jame.um.ac.ir/article_35124.html}, eprint = {https://jame.um.ac.ir/article_35124_ef4750e7d51ae17d589d836625ce4f3e.pdf} }